{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.069,
  "eval_steps": 500,
  "global_step": 5000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0002,
      "grad_norm": 17.97269058227539,
      "learning_rate": 2e-08,
      "loss": 0.8167,
      "step": 1
    },
    {
      "epoch": 0.0004,
      "grad_norm": 17.09549331665039,
      "learning_rate": 4e-08,
      "loss": 0.8546,
      "step": 2
    },
    {
      "epoch": 0.0006,
      "grad_norm": 16.300983428955078,
      "learning_rate": 6.000000000000001e-08,
      "loss": 0.8395,
      "step": 3
    },
    {
      "epoch": 0.0008,
      "grad_norm": 17.731111526489258,
      "learning_rate": 8e-08,
      "loss": 0.8239,
      "step": 4
    },
    {
      "epoch": 0.001,
      "grad_norm": 17.372011184692383,
      "learning_rate": 1.0000000000000001e-07,
      "loss": 0.8181,
      "step": 5
    },
    {
      "epoch": 0.0012,
      "grad_norm": 17.901464462280273,
      "learning_rate": 1.2000000000000002e-07,
      "loss": 0.8178,
      "step": 6
    },
    {
      "epoch": 0.0014,
      "grad_norm": 18.532596588134766,
      "learning_rate": 1.4e-07,
      "loss": 0.7867,
      "step": 7
    },
    {
      "epoch": 0.0016,
      "grad_norm": 18.095897674560547,
      "learning_rate": 1.6e-07,
      "loss": 0.827,
      "step": 8
    },
    {
      "epoch": 0.0018,
      "grad_norm": 18.920656204223633,
      "learning_rate": 1.8e-07,
      "loss": 0.7756,
      "step": 9
    },
    {
      "epoch": 0.002,
      "grad_norm": 17.61746597290039,
      "learning_rate": 2.0000000000000002e-07,
      "loss": 0.8204,
      "step": 10
    },
    {
      "epoch": 0.0022,
      "grad_norm": 17.334989547729492,
      "learning_rate": 2.2e-07,
      "loss": 0.8228,
      "step": 11
    },
    {
      "epoch": 0.0024,
      "grad_norm": 18.79360580444336,
      "learning_rate": 2.4000000000000003e-07,
      "loss": 0.8088,
      "step": 12
    },
    {
      "epoch": 0.0026,
      "grad_norm": 17.502933502197266,
      "learning_rate": 2.6e-07,
      "loss": 0.8062,
      "step": 13
    },
    {
      "epoch": 0.0028,
      "grad_norm": 16.895002365112305,
      "learning_rate": 2.8e-07,
      "loss": 0.8223,
      "step": 14
    },
    {
      "epoch": 0.003,
      "grad_norm": 17.6730899810791,
      "learning_rate": 3.0000000000000004e-07,
      "loss": 0.8076,
      "step": 15
    },
    {
      "epoch": 0.0032,
      "grad_norm": 17.09043312072754,
      "learning_rate": 3.2e-07,
      "loss": 0.7957,
      "step": 16
    },
    {
      "epoch": 0.0034,
      "grad_norm": 17.19906997680664,
      "learning_rate": 3.4000000000000003e-07,
      "loss": 0.7719,
      "step": 17
    },
    {
      "epoch": 0.0036,
      "grad_norm": 18.258024215698242,
      "learning_rate": 3.6e-07,
      "loss": 0.7677,
      "step": 18
    },
    {
      "epoch": 0.0038,
      "grad_norm": 16.483152389526367,
      "learning_rate": 3.8e-07,
      "loss": 0.8058,
      "step": 19
    },
    {
      "epoch": 0.004,
      "grad_norm": 16.420392990112305,
      "learning_rate": 4.0000000000000003e-07,
      "loss": 0.7845,
      "step": 20
    },
    {
      "epoch": 0.0042,
      "grad_norm": 16.849567413330078,
      "learning_rate": 4.2000000000000006e-07,
      "loss": 0.7266,
      "step": 21
    },
    {
      "epoch": 0.0044,
      "grad_norm": 14.625004768371582,
      "learning_rate": 4.4e-07,
      "loss": 0.7221,
      "step": 22
    },
    {
      "epoch": 0.0046,
      "grad_norm": 13.381985664367676,
      "learning_rate": 4.6000000000000004e-07,
      "loss": 0.7395,
      "step": 23
    },
    {
      "epoch": 0.0048,
      "grad_norm": 14.497766494750977,
      "learning_rate": 4.800000000000001e-07,
      "loss": 0.7101,
      "step": 24
    },
    {
      "epoch": 0.005,
      "grad_norm": 13.240507125854492,
      "learning_rate": 5.000000000000001e-07,
      "loss": 0.7618,
      "step": 25
    },
    {
      "epoch": 0.0052,
      "grad_norm": 12.958215713500977,
      "learning_rate": 5.2e-07,
      "loss": 0.7476,
      "step": 26
    },
    {
      "epoch": 0.0054,
      "grad_norm": 13.203178405761719,
      "learning_rate": 5.4e-07,
      "loss": 0.7303,
      "step": 27
    },
    {
      "epoch": 0.0056,
      "grad_norm": 13.179447174072266,
      "learning_rate": 5.6e-07,
      "loss": 0.6991,
      "step": 28
    },
    {
      "epoch": 0.0058,
      "grad_norm": 12.315122604370117,
      "learning_rate": 5.800000000000001e-07,
      "loss": 0.6904,
      "step": 29
    },
    {
      "epoch": 0.006,
      "grad_norm": 6.784637451171875,
      "learning_rate": 6.000000000000001e-07,
      "loss": 0.6437,
      "step": 30
    },
    {
      "epoch": 0.0062,
      "grad_norm": 6.480414390563965,
      "learning_rate": 6.200000000000001e-07,
      "loss": 0.6424,
      "step": 31
    },
    {
      "epoch": 0.0064,
      "grad_norm": 5.988020896911621,
      "learning_rate": 6.4e-07,
      "loss": 0.6354,
      "step": 32
    },
    {
      "epoch": 0.0066,
      "grad_norm": 6.273484230041504,
      "learning_rate": 6.6e-07,
      "loss": 0.5735,
      "step": 33
    },
    {
      "epoch": 0.0068,
      "grad_norm": 6.120683193206787,
      "learning_rate": 6.800000000000001e-07,
      "loss": 0.6357,
      "step": 34
    },
    {
      "epoch": 0.007,
      "grad_norm": 6.0412397384643555,
      "learning_rate": 7.000000000000001e-07,
      "loss": 0.6151,
      "step": 35
    },
    {
      "epoch": 0.0072,
      "grad_norm": 5.526604175567627,
      "learning_rate": 7.2e-07,
      "loss": 0.5895,
      "step": 36
    },
    {
      "epoch": 0.0074,
      "grad_norm": 5.2901716232299805,
      "learning_rate": 7.4e-07,
      "loss": 0.6205,
      "step": 37
    },
    {
      "epoch": 0.0076,
      "grad_norm": 5.131118297576904,
      "learning_rate": 7.6e-07,
      "loss": 0.6118,
      "step": 38
    },
    {
      "epoch": 0.0078,
      "grad_norm": 4.498150825500488,
      "learning_rate": 7.8e-07,
      "loss": 0.598,
      "step": 39
    },
    {
      "epoch": 0.008,
      "grad_norm": 3.5042121410369873,
      "learning_rate": 8.000000000000001e-07,
      "loss": 0.5298,
      "step": 40
    },
    {
      "epoch": 0.0082,
      "grad_norm": 2.486785650253296,
      "learning_rate": 8.200000000000001e-07,
      "loss": 0.5166,
      "step": 41
    },
    {
      "epoch": 0.0084,
      "grad_norm": 2.0915184020996094,
      "learning_rate": 8.400000000000001e-07,
      "loss": 0.4809,
      "step": 42
    },
    {
      "epoch": 0.0086,
      "grad_norm": 2.011510133743286,
      "learning_rate": 8.6e-07,
      "loss": 0.5003,
      "step": 43
    },
    {
      "epoch": 0.0088,
      "grad_norm": 1.8339259624481201,
      "learning_rate": 8.8e-07,
      "loss": 0.4743,
      "step": 44
    },
    {
      "epoch": 0.009,
      "grad_norm": 1.8110532760620117,
      "learning_rate": 9.000000000000001e-07,
      "loss": 0.5289,
      "step": 45
    },
    {
      "epoch": 0.0092,
      "grad_norm": 1.7591607570648193,
      "learning_rate": 9.200000000000001e-07,
      "loss": 0.5078,
      "step": 46
    },
    {
      "epoch": 0.0094,
      "grad_norm": 1.5080705881118774,
      "learning_rate": 9.400000000000001e-07,
      "loss": 0.4792,
      "step": 47
    },
    {
      "epoch": 0.0096,
      "grad_norm": 1.5115655660629272,
      "learning_rate": 9.600000000000001e-07,
      "loss": 0.4529,
      "step": 48
    },
    {
      "epoch": 0.0098,
      "grad_norm": 1.489723563194275,
      "learning_rate": 9.800000000000001e-07,
      "loss": 0.5206,
      "step": 49
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4680073261260986,
      "learning_rate": 1.0000000000000002e-06,
      "loss": 0.5072,
      "step": 50
    },
    {
      "epoch": 0.0102,
      "grad_norm": 1.371167778968811,
      "learning_rate": 1.02e-06,
      "loss": 0.4273,
      "step": 51
    },
    {
      "epoch": 0.0104,
      "grad_norm": 2.7187724113464355,
      "learning_rate": 1.04e-06,
      "loss": 0.5109,
      "step": 52
    },
    {
      "epoch": 0.0106,
      "grad_norm": 0.9156416058540344,
      "learning_rate": 1.06e-06,
      "loss": 0.4725,
      "step": 53
    },
    {
      "epoch": 0.0108,
      "grad_norm": 0.8322800993919373,
      "learning_rate": 1.08e-06,
      "loss": 0.4608,
      "step": 54
    },
    {
      "epoch": 0.011,
      "grad_norm": 0.652372419834137,
      "learning_rate": 1.1e-06,
      "loss": 0.4015,
      "step": 55
    },
    {
      "epoch": 0.0112,
      "grad_norm": 0.6940380334854126,
      "learning_rate": 1.12e-06,
      "loss": 0.431,
      "step": 56
    },
    {
      "epoch": 0.0114,
      "grad_norm": 0.6117832064628601,
      "learning_rate": 1.14e-06,
      "loss": 0.4102,
      "step": 57
    },
    {
      "epoch": 0.0116,
      "grad_norm": 0.6187627911567688,
      "learning_rate": 1.1600000000000001e-06,
      "loss": 0.4572,
      "step": 58
    },
    {
      "epoch": 0.0118,
      "grad_norm": 0.6884448528289795,
      "learning_rate": 1.1800000000000001e-06,
      "loss": 0.4329,
      "step": 59
    },
    {
      "epoch": 0.012,
      "grad_norm": 0.6142067909240723,
      "learning_rate": 1.2000000000000002e-06,
      "loss": 0.4553,
      "step": 60
    },
    {
      "epoch": 0.0122,
      "grad_norm": 0.611631453037262,
      "learning_rate": 1.2200000000000002e-06,
      "loss": 0.4168,
      "step": 61
    },
    {
      "epoch": 0.0124,
      "grad_norm": 0.8171571493148804,
      "learning_rate": 1.2400000000000002e-06,
      "loss": 0.424,
      "step": 62
    },
    {
      "epoch": 0.0126,
      "grad_norm": 0.5115475654602051,
      "learning_rate": 1.26e-06,
      "loss": 0.3565,
      "step": 63
    },
    {
      "epoch": 0.0128,
      "grad_norm": 0.6233605742454529,
      "learning_rate": 1.28e-06,
      "loss": 0.4151,
      "step": 64
    },
    {
      "epoch": 0.013,
      "grad_norm": 0.47658413648605347,
      "learning_rate": 1.3e-06,
      "loss": 0.3838,
      "step": 65
    },
    {
      "epoch": 0.0132,
      "grad_norm": 0.6022032499313354,
      "learning_rate": 1.32e-06,
      "loss": 0.4034,
      "step": 66
    },
    {
      "epoch": 0.0134,
      "grad_norm": 0.5471862554550171,
      "learning_rate": 1.34e-06,
      "loss": 0.4004,
      "step": 67
    },
    {
      "epoch": 0.0136,
      "grad_norm": 0.4840533435344696,
      "learning_rate": 1.3600000000000001e-06,
      "loss": 0.3914,
      "step": 68
    },
    {
      "epoch": 0.0138,
      "grad_norm": 0.7452861070632935,
      "learning_rate": 1.3800000000000001e-06,
      "loss": 0.4132,
      "step": 69
    },
    {
      "epoch": 0.014,
      "grad_norm": 0.504267692565918,
      "learning_rate": 1.4000000000000001e-06,
      "loss": 0.3992,
      "step": 70
    },
    {
      "epoch": 0.0142,
      "grad_norm": 0.659186840057373,
      "learning_rate": 1.42e-06,
      "loss": 0.4238,
      "step": 71
    },
    {
      "epoch": 0.0144,
      "grad_norm": 0.47695696353912354,
      "learning_rate": 1.44e-06,
      "loss": 0.4181,
      "step": 72
    },
    {
      "epoch": 0.0146,
      "grad_norm": 0.590043306350708,
      "learning_rate": 1.46e-06,
      "loss": 0.4276,
      "step": 73
    },
    {
      "epoch": 0.0148,
      "grad_norm": 0.4621325433254242,
      "learning_rate": 1.48e-06,
      "loss": 0.4124,
      "step": 74
    },
    {
      "epoch": 0.015,
      "grad_norm": 0.5502806305885315,
      "learning_rate": 1.5e-06,
      "loss": 0.4216,
      "step": 75
    },
    {
      "epoch": 0.0152,
      "grad_norm": 0.5027847290039062,
      "learning_rate": 1.52e-06,
      "loss": 0.4263,
      "step": 76
    },
    {
      "epoch": 0.0154,
      "grad_norm": 0.6224135756492615,
      "learning_rate": 1.54e-06,
      "loss": 0.3762,
      "step": 77
    },
    {
      "epoch": 0.0156,
      "grad_norm": 0.5272960066795349,
      "learning_rate": 1.56e-06,
      "loss": 0.3933,
      "step": 78
    },
    {
      "epoch": 0.0158,
      "grad_norm": 0.45190051198005676,
      "learning_rate": 1.5800000000000001e-06,
      "loss": 0.3692,
      "step": 79
    },
    {
      "epoch": 0.016,
      "grad_norm": 0.534759521484375,
      "learning_rate": 1.6000000000000001e-06,
      "loss": 0.3938,
      "step": 80
    },
    {
      "epoch": 0.0162,
      "grad_norm": 0.5399959683418274,
      "learning_rate": 1.6200000000000002e-06,
      "loss": 0.3846,
      "step": 81
    },
    {
      "epoch": 0.0164,
      "grad_norm": 0.4664633274078369,
      "learning_rate": 1.6400000000000002e-06,
      "loss": 0.4163,
      "step": 82
    },
    {
      "epoch": 0.0166,
      "grad_norm": 1.0736939907073975,
      "learning_rate": 1.6600000000000002e-06,
      "loss": 0.4128,
      "step": 83
    },
    {
      "epoch": 0.0168,
      "grad_norm": 0.5936366319656372,
      "learning_rate": 1.6800000000000002e-06,
      "loss": 0.3433,
      "step": 84
    },
    {
      "epoch": 0.017,
      "grad_norm": 0.4042677879333496,
      "learning_rate": 1.7000000000000002e-06,
      "loss": 0.3734,
      "step": 85
    },
    {
      "epoch": 0.0172,
      "grad_norm": 0.43845823407173157,
      "learning_rate": 1.72e-06,
      "loss": 0.3925,
      "step": 86
    },
    {
      "epoch": 0.0174,
      "grad_norm": 0.4913782775402069,
      "learning_rate": 1.74e-06,
      "loss": 0.3947,
      "step": 87
    },
    {
      "epoch": 0.0176,
      "grad_norm": 0.47318845987319946,
      "learning_rate": 1.76e-06,
      "loss": 0.4016,
      "step": 88
    },
    {
      "epoch": 0.0178,
      "grad_norm": 0.5881785154342651,
      "learning_rate": 1.7800000000000001e-06,
      "loss": 0.3652,
      "step": 89
    },
    {
      "epoch": 0.018,
      "grad_norm": 0.551909863948822,
      "learning_rate": 1.8000000000000001e-06,
      "loss": 0.3789,
      "step": 90
    },
    {
      "epoch": 0.0182,
      "grad_norm": 0.48561134934425354,
      "learning_rate": 1.8200000000000002e-06,
      "loss": 0.3613,
      "step": 91
    },
    {
      "epoch": 0.0184,
      "grad_norm": 0.48683464527130127,
      "learning_rate": 1.8400000000000002e-06,
      "loss": 0.3483,
      "step": 92
    },
    {
      "epoch": 0.0186,
      "grad_norm": 0.37006062269210815,
      "learning_rate": 1.8600000000000002e-06,
      "loss": 0.3793,
      "step": 93
    },
    {
      "epoch": 0.0188,
      "grad_norm": 0.44092586636543274,
      "learning_rate": 1.8800000000000002e-06,
      "loss": 0.4029,
      "step": 94
    },
    {
      "epoch": 0.019,
      "grad_norm": 0.6408613920211792,
      "learning_rate": 1.9000000000000002e-06,
      "loss": 0.4543,
      "step": 95
    },
    {
      "epoch": 0.0192,
      "grad_norm": 0.5106256008148193,
      "learning_rate": 1.9200000000000003e-06,
      "loss": 0.3707,
      "step": 96
    },
    {
      "epoch": 0.0194,
      "grad_norm": 0.5857816338539124,
      "learning_rate": 1.94e-06,
      "loss": 0.4017,
      "step": 97
    },
    {
      "epoch": 0.0196,
      "grad_norm": 0.5307949185371399,
      "learning_rate": 1.9600000000000003e-06,
      "loss": 0.3993,
      "step": 98
    },
    {
      "epoch": 0.0198,
      "grad_norm": 0.5702608227729797,
      "learning_rate": 1.98e-06,
      "loss": 0.4083,
      "step": 99
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.566071629524231,
      "learning_rate": 2.0000000000000003e-06,
      "loss": 0.3895,
      "step": 100
    },
    {
      "epoch": 0.0202,
      "grad_norm": 0.43044522404670715,
      "learning_rate": 2.02e-06,
      "loss": 0.383,
      "step": 101
    },
    {
      "epoch": 0.0204,
      "grad_norm": 0.37991130352020264,
      "learning_rate": 2.04e-06,
      "loss": 0.3644,
      "step": 102
    },
    {
      "epoch": 0.0206,
      "grad_norm": 0.45190778374671936,
      "learning_rate": 2.06e-06,
      "loss": 0.3699,
      "step": 103
    },
    {
      "epoch": 0.0208,
      "grad_norm": 0.8487486243247986,
      "learning_rate": 2.08e-06,
      "loss": 0.3237,
      "step": 104
    },
    {
      "epoch": 0.021,
      "grad_norm": 0.5039421319961548,
      "learning_rate": 2.1000000000000002e-06,
      "loss": 0.3652,
      "step": 105
    },
    {
      "epoch": 0.0212,
      "grad_norm": 0.5598636865615845,
      "learning_rate": 2.12e-06,
      "loss": 0.379,
      "step": 106
    },
    {
      "epoch": 0.0214,
      "grad_norm": 0.40302714705467224,
      "learning_rate": 2.1400000000000003e-06,
      "loss": 0.355,
      "step": 107
    },
    {
      "epoch": 0.0216,
      "grad_norm": 0.454899400472641,
      "learning_rate": 2.16e-06,
      "loss": 0.3977,
      "step": 108
    },
    {
      "epoch": 0.0218,
      "grad_norm": 0.5021453499794006,
      "learning_rate": 2.1800000000000003e-06,
      "loss": 0.322,
      "step": 109
    },
    {
      "epoch": 0.022,
      "grad_norm": 0.8794844746589661,
      "learning_rate": 2.2e-06,
      "loss": 0.4249,
      "step": 110
    },
    {
      "epoch": 0.0222,
      "grad_norm": 0.4198489785194397,
      "learning_rate": 2.2200000000000003e-06,
      "loss": 0.3669,
      "step": 111
    },
    {
      "epoch": 0.0224,
      "grad_norm": 0.4584365785121918,
      "learning_rate": 2.24e-06,
      "loss": 0.3622,
      "step": 112
    },
    {
      "epoch": 0.0226,
      "grad_norm": 0.4323468804359436,
      "learning_rate": 2.2600000000000004e-06,
      "loss": 0.3745,
      "step": 113
    },
    {
      "epoch": 0.0228,
      "grad_norm": 0.7490417957305908,
      "learning_rate": 2.28e-06,
      "loss": 0.3832,
      "step": 114
    },
    {
      "epoch": 0.023,
      "grad_norm": 0.4236067235469818,
      "learning_rate": 2.3000000000000004e-06,
      "loss": 0.371,
      "step": 115
    },
    {
      "epoch": 0.0232,
      "grad_norm": 0.48021242022514343,
      "learning_rate": 2.3200000000000002e-06,
      "loss": 0.4046,
      "step": 116
    },
    {
      "epoch": 0.0234,
      "grad_norm": 0.6185507774353027,
      "learning_rate": 2.3400000000000005e-06,
      "loss": 0.3886,
      "step": 117
    },
    {
      "epoch": 0.0236,
      "grad_norm": 0.5042728185653687,
      "learning_rate": 2.3600000000000003e-06,
      "loss": 0.3564,
      "step": 118
    },
    {
      "epoch": 0.0238,
      "grad_norm": 0.4533032178878784,
      "learning_rate": 2.38e-06,
      "loss": 0.3459,
      "step": 119
    },
    {
      "epoch": 0.024,
      "grad_norm": 0.4727221429347992,
      "learning_rate": 2.4000000000000003e-06,
      "loss": 0.4146,
      "step": 120
    },
    {
      "epoch": 0.0242,
      "grad_norm": 0.4660988748073578,
      "learning_rate": 2.42e-06,
      "loss": 0.3662,
      "step": 121
    },
    {
      "epoch": 0.0244,
      "grad_norm": 0.6129406094551086,
      "learning_rate": 2.4400000000000004e-06,
      "loss": 0.4127,
      "step": 122
    },
    {
      "epoch": 0.0246,
      "grad_norm": 0.7617100477218628,
      "learning_rate": 2.46e-06,
      "loss": 0.3333,
      "step": 123
    },
    {
      "epoch": 0.0248,
      "grad_norm": 0.4653623104095459,
      "learning_rate": 2.4800000000000004e-06,
      "loss": 0.3957,
      "step": 124
    },
    {
      "epoch": 0.025,
      "grad_norm": 0.5935105085372925,
      "learning_rate": 2.5e-06,
      "loss": 0.3693,
      "step": 125
    },
    {
      "epoch": 0.0252,
      "grad_norm": 0.4042361080646515,
      "learning_rate": 2.52e-06,
      "loss": 0.3345,
      "step": 126
    },
    {
      "epoch": 0.0254,
      "grad_norm": 0.5434343814849854,
      "learning_rate": 2.5400000000000002e-06,
      "loss": 0.3548,
      "step": 127
    },
    {
      "epoch": 0.0256,
      "grad_norm": 0.4848114252090454,
      "learning_rate": 2.56e-06,
      "loss": 0.3683,
      "step": 128
    },
    {
      "epoch": 0.0258,
      "grad_norm": 0.4947889447212219,
      "learning_rate": 2.5800000000000003e-06,
      "loss": 0.3764,
      "step": 129
    },
    {
      "epoch": 0.026,
      "grad_norm": 0.5272525548934937,
      "learning_rate": 2.6e-06,
      "loss": 0.3847,
      "step": 130
    },
    {
      "epoch": 0.0262,
      "grad_norm": 0.4667666554450989,
      "learning_rate": 2.6200000000000003e-06,
      "loss": 0.3662,
      "step": 131
    },
    {
      "epoch": 0.0264,
      "grad_norm": 0.48736771941185,
      "learning_rate": 2.64e-06,
      "loss": 0.3937,
      "step": 132
    },
    {
      "epoch": 0.0266,
      "grad_norm": 0.4935629367828369,
      "learning_rate": 2.6600000000000004e-06,
      "loss": 0.393,
      "step": 133
    },
    {
      "epoch": 0.0268,
      "grad_norm": 0.42808762192726135,
      "learning_rate": 2.68e-06,
      "loss": 0.3534,
      "step": 134
    },
    {
      "epoch": 0.027,
      "grad_norm": 0.5305779576301575,
      "learning_rate": 2.7000000000000004e-06,
      "loss": 0.3735,
      "step": 135
    },
    {
      "epoch": 0.0272,
      "grad_norm": 0.5294774174690247,
      "learning_rate": 2.7200000000000002e-06,
      "loss": 0.3591,
      "step": 136
    },
    {
      "epoch": 0.0274,
      "grad_norm": 0.4570407569408417,
      "learning_rate": 2.7400000000000004e-06,
      "loss": 0.3968,
      "step": 137
    },
    {
      "epoch": 0.0276,
      "grad_norm": 0.45037782192230225,
      "learning_rate": 2.7600000000000003e-06,
      "loss": 0.3442,
      "step": 138
    },
    {
      "epoch": 0.0278,
      "grad_norm": 0.4974113702774048,
      "learning_rate": 2.7800000000000005e-06,
      "loss": 0.3745,
      "step": 139
    },
    {
      "epoch": 0.028,
      "grad_norm": 0.6906246542930603,
      "learning_rate": 2.8000000000000003e-06,
      "loss": 0.362,
      "step": 140
    },
    {
      "epoch": 0.0282,
      "grad_norm": 0.783227801322937,
      "learning_rate": 2.82e-06,
      "loss": 0.3392,
      "step": 141
    },
    {
      "epoch": 0.0284,
      "grad_norm": 0.48793527483940125,
      "learning_rate": 2.84e-06,
      "loss": 0.3756,
      "step": 142
    },
    {
      "epoch": 0.0286,
      "grad_norm": 0.4634222388267517,
      "learning_rate": 2.86e-06,
      "loss": 0.4002,
      "step": 143
    },
    {
      "epoch": 0.0288,
      "grad_norm": 0.45307955145835876,
      "learning_rate": 2.88e-06,
      "loss": 0.3839,
      "step": 144
    },
    {
      "epoch": 0.029,
      "grad_norm": 0.6718571782112122,
      "learning_rate": 2.9e-06,
      "loss": 0.3666,
      "step": 145
    },
    {
      "epoch": 0.0292,
      "grad_norm": 0.5100830793380737,
      "learning_rate": 2.92e-06,
      "loss": 0.3933,
      "step": 146
    },
    {
      "epoch": 0.0294,
      "grad_norm": 0.39325565099716187,
      "learning_rate": 2.9400000000000002e-06,
      "loss": 0.3643,
      "step": 147
    },
    {
      "epoch": 0.0296,
      "grad_norm": 0.5235859155654907,
      "learning_rate": 2.96e-06,
      "loss": 0.3282,
      "step": 148
    },
    {
      "epoch": 0.0298,
      "grad_norm": 0.5658304691314697,
      "learning_rate": 2.9800000000000003e-06,
      "loss": 0.3582,
      "step": 149
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5299038887023926,
      "learning_rate": 3e-06,
      "loss": 0.3442,
      "step": 150
    },
    {
      "epoch": 0.0302,
      "grad_norm": 0.4664023816585541,
      "learning_rate": 3.0200000000000003e-06,
      "loss": 0.3936,
      "step": 151
    },
    {
      "epoch": 0.0304,
      "grad_norm": 0.48280373215675354,
      "learning_rate": 3.04e-06,
      "loss": 0.3713,
      "step": 152
    },
    {
      "epoch": 0.0306,
      "grad_norm": 0.5745037198066711,
      "learning_rate": 3.0600000000000003e-06,
      "loss": 0.3647,
      "step": 153
    },
    {
      "epoch": 0.0308,
      "grad_norm": 0.5101187825202942,
      "learning_rate": 3.08e-06,
      "loss": 0.3605,
      "step": 154
    },
    {
      "epoch": 0.031,
      "grad_norm": 0.5530501008033752,
      "learning_rate": 3.1000000000000004e-06,
      "loss": 0.3568,
      "step": 155
    },
    {
      "epoch": 0.0312,
      "grad_norm": 0.5654841661453247,
      "learning_rate": 3.12e-06,
      "loss": 0.3743,
      "step": 156
    },
    {
      "epoch": 0.0314,
      "grad_norm": 0.4923531115055084,
      "learning_rate": 3.1400000000000004e-06,
      "loss": 0.3801,
      "step": 157
    },
    {
      "epoch": 0.0316,
      "grad_norm": 0.5236830711364746,
      "learning_rate": 3.1600000000000002e-06,
      "loss": 0.4096,
      "step": 158
    },
    {
      "epoch": 0.0318,
      "grad_norm": 0.40548253059387207,
      "learning_rate": 3.1800000000000005e-06,
      "loss": 0.3198,
      "step": 159
    },
    {
      "epoch": 0.032,
      "grad_norm": 0.49229100346565247,
      "learning_rate": 3.2000000000000003e-06,
      "loss": 0.3812,
      "step": 160
    },
    {
      "epoch": 0.0322,
      "grad_norm": 0.4936654269695282,
      "learning_rate": 3.2200000000000005e-06,
      "loss": 0.3878,
      "step": 161
    },
    {
      "epoch": 0.0324,
      "grad_norm": 0.43520358204841614,
      "learning_rate": 3.2400000000000003e-06,
      "loss": 0.3647,
      "step": 162
    },
    {
      "epoch": 0.0326,
      "grad_norm": 0.5249994397163391,
      "learning_rate": 3.2600000000000006e-06,
      "loss": 0.3945,
      "step": 163
    },
    {
      "epoch": 0.0328,
      "grad_norm": 0.4600265920162201,
      "learning_rate": 3.2800000000000004e-06,
      "loss": 0.4077,
      "step": 164
    },
    {
      "epoch": 0.033,
      "grad_norm": 0.5561789274215698,
      "learning_rate": 3.3000000000000006e-06,
      "loss": 0.3871,
      "step": 165
    },
    {
      "epoch": 0.0332,
      "grad_norm": 0.4665578007698059,
      "learning_rate": 3.3200000000000004e-06,
      "loss": 0.3888,
      "step": 166
    },
    {
      "epoch": 0.0334,
      "grad_norm": 0.43751949071884155,
      "learning_rate": 3.3400000000000006e-06,
      "loss": 0.3595,
      "step": 167
    },
    {
      "epoch": 0.0336,
      "grad_norm": 0.573382556438446,
      "learning_rate": 3.3600000000000004e-06,
      "loss": 0.3931,
      "step": 168
    },
    {
      "epoch": 0.0338,
      "grad_norm": 0.517301082611084,
      "learning_rate": 3.3800000000000007e-06,
      "loss": 0.3449,
      "step": 169
    },
    {
      "epoch": 0.034,
      "grad_norm": 0.520264208316803,
      "learning_rate": 3.4000000000000005e-06,
      "loss": 0.3787,
      "step": 170
    },
    {
      "epoch": 0.0342,
      "grad_norm": 0.4627216160297394,
      "learning_rate": 3.4200000000000007e-06,
      "loss": 0.3405,
      "step": 171
    },
    {
      "epoch": 0.0344,
      "grad_norm": 0.3854435384273529,
      "learning_rate": 3.44e-06,
      "loss": 0.3433,
      "step": 172
    },
    {
      "epoch": 0.0346,
      "grad_norm": 0.4471338093280792,
      "learning_rate": 3.46e-06,
      "loss": 0.3939,
      "step": 173
    },
    {
      "epoch": 0.0348,
      "grad_norm": 0.46897152066230774,
      "learning_rate": 3.48e-06,
      "loss": 0.3316,
      "step": 174
    },
    {
      "epoch": 0.035,
      "grad_norm": 0.35544148087501526,
      "learning_rate": 3.5e-06,
      "loss": 0.3517,
      "step": 175
    },
    {
      "epoch": 0.0352,
      "grad_norm": 0.43114539980888367,
      "learning_rate": 3.52e-06,
      "loss": 0.373,
      "step": 176
    },
    {
      "epoch": 0.0354,
      "grad_norm": 0.5485230088233948,
      "learning_rate": 3.54e-06,
      "loss": 0.417,
      "step": 177
    },
    {
      "epoch": 0.0356,
      "grad_norm": 0.421191543340683,
      "learning_rate": 3.5600000000000002e-06,
      "loss": 0.34,
      "step": 178
    },
    {
      "epoch": 0.0358,
      "grad_norm": 0.41121935844421387,
      "learning_rate": 3.58e-06,
      "loss": 0.3962,
      "step": 179
    },
    {
      "epoch": 0.036,
      "grad_norm": 0.45167702436447144,
      "learning_rate": 3.6000000000000003e-06,
      "loss": 0.3618,
      "step": 180
    },
    {
      "epoch": 0.0362,
      "grad_norm": 0.5135020017623901,
      "learning_rate": 3.62e-06,
      "loss": 0.3641,
      "step": 181
    },
    {
      "epoch": 0.0364,
      "grad_norm": 0.4619881808757782,
      "learning_rate": 3.6400000000000003e-06,
      "loss": 0.3561,
      "step": 182
    },
    {
      "epoch": 0.0366,
      "grad_norm": 0.5459278225898743,
      "learning_rate": 3.66e-06,
      "loss": 0.3465,
      "step": 183
    },
    {
      "epoch": 0.0368,
      "grad_norm": 0.4173431098461151,
      "learning_rate": 3.6800000000000003e-06,
      "loss": 0.3919,
      "step": 184
    },
    {
      "epoch": 0.037,
      "grad_norm": 0.5145044326782227,
      "learning_rate": 3.7e-06,
      "loss": 0.387,
      "step": 185
    },
    {
      "epoch": 0.0372,
      "grad_norm": 0.4921259582042694,
      "learning_rate": 3.7200000000000004e-06,
      "loss": 0.3531,
      "step": 186
    },
    {
      "epoch": 0.0374,
      "grad_norm": 0.5367409586906433,
      "learning_rate": 3.74e-06,
      "loss": 0.3592,
      "step": 187
    },
    {
      "epoch": 0.0376,
      "grad_norm": 0.5271394848823547,
      "learning_rate": 3.7600000000000004e-06,
      "loss": 0.3671,
      "step": 188
    },
    {
      "epoch": 0.0378,
      "grad_norm": 0.6384382247924805,
      "learning_rate": 3.7800000000000002e-06,
      "loss": 0.3606,
      "step": 189
    },
    {
      "epoch": 0.038,
      "grad_norm": 1.0402482748031616,
      "learning_rate": 3.8000000000000005e-06,
      "loss": 0.3685,
      "step": 190
    },
    {
      "epoch": 0.0382,
      "grad_norm": 0.4758528769016266,
      "learning_rate": 3.820000000000001e-06,
      "loss": 0.3906,
      "step": 191
    },
    {
      "epoch": 0.0384,
      "grad_norm": 1.0305895805358887,
      "learning_rate": 3.8400000000000005e-06,
      "loss": 0.3711,
      "step": 192
    },
    {
      "epoch": 0.0386,
      "grad_norm": 0.6315585374832153,
      "learning_rate": 3.86e-06,
      "loss": 0.4196,
      "step": 193
    },
    {
      "epoch": 0.0388,
      "grad_norm": 0.5562466382980347,
      "learning_rate": 3.88e-06,
      "loss": 0.353,
      "step": 194
    },
    {
      "epoch": 0.039,
      "grad_norm": 0.6188409328460693,
      "learning_rate": 3.900000000000001e-06,
      "loss": 0.3772,
      "step": 195
    },
    {
      "epoch": 0.0392,
      "grad_norm": 0.4420935809612274,
      "learning_rate": 3.920000000000001e-06,
      "loss": 0.3798,
      "step": 196
    },
    {
      "epoch": 0.0394,
      "grad_norm": 0.4730742573738098,
      "learning_rate": 3.94e-06,
      "loss": 0.3385,
      "step": 197
    },
    {
      "epoch": 0.0396,
      "grad_norm": 0.6343469023704529,
      "learning_rate": 3.96e-06,
      "loss": 0.3501,
      "step": 198
    },
    {
      "epoch": 0.0398,
      "grad_norm": 0.6453588008880615,
      "learning_rate": 3.980000000000001e-06,
      "loss": 0.4113,
      "step": 199
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.464709609746933,
      "learning_rate": 4.000000000000001e-06,
      "loss": 0.3799,
      "step": 200
    },
    {
      "epoch": 0.0402,
      "grad_norm": 0.5910764932632446,
      "learning_rate": 4.0200000000000005e-06,
      "loss": 0.3972,
      "step": 201
    },
    {
      "epoch": 0.0404,
      "grad_norm": 0.40358173847198486,
      "learning_rate": 4.04e-06,
      "loss": 0.3573,
      "step": 202
    },
    {
      "epoch": 0.0406,
      "grad_norm": 0.45070528984069824,
      "learning_rate": 4.060000000000001e-06,
      "loss": 0.3905,
      "step": 203
    },
    {
      "epoch": 0.0408,
      "grad_norm": 0.5609254837036133,
      "learning_rate": 4.08e-06,
      "loss": 0.35,
      "step": 204
    },
    {
      "epoch": 0.041,
      "grad_norm": 0.5488823652267456,
      "learning_rate": 4.1e-06,
      "loss": 0.3879,
      "step": 205
    },
    {
      "epoch": 0.0412,
      "grad_norm": 0.48123255372047424,
      "learning_rate": 4.12e-06,
      "loss": 0.3721,
      "step": 206
    },
    {
      "epoch": 0.0414,
      "grad_norm": 0.5321407914161682,
      "learning_rate": 4.14e-06,
      "loss": 0.3752,
      "step": 207
    },
    {
      "epoch": 0.0416,
      "grad_norm": 0.4854133427143097,
      "learning_rate": 4.16e-06,
      "loss": 0.3708,
      "step": 208
    },
    {
      "epoch": 0.0418,
      "grad_norm": 0.7217606902122498,
      "learning_rate": 4.18e-06,
      "loss": 0.3567,
      "step": 209
    },
    {
      "epoch": 0.042,
      "grad_norm": 0.5226067304611206,
      "learning_rate": 4.2000000000000004e-06,
      "loss": 0.3927,
      "step": 210
    },
    {
      "epoch": 0.0422,
      "grad_norm": 0.6209088563919067,
      "learning_rate": 4.22e-06,
      "loss": 0.3817,
      "step": 211
    },
    {
      "epoch": 0.0424,
      "grad_norm": 0.4385822117328644,
      "learning_rate": 4.24e-06,
      "loss": 0.3455,
      "step": 212
    },
    {
      "epoch": 0.0426,
      "grad_norm": 0.4852634072303772,
      "learning_rate": 4.26e-06,
      "loss": 0.3972,
      "step": 213
    },
    {
      "epoch": 0.0428,
      "grad_norm": 0.4828781187534332,
      "learning_rate": 4.2800000000000005e-06,
      "loss": 0.3833,
      "step": 214
    },
    {
      "epoch": 0.043,
      "grad_norm": 0.4855740964412689,
      "learning_rate": 4.3e-06,
      "loss": 0.3628,
      "step": 215
    },
    {
      "epoch": 0.0432,
      "grad_norm": 0.44545137882232666,
      "learning_rate": 4.32e-06,
      "loss": 0.3464,
      "step": 216
    },
    {
      "epoch": 0.0434,
      "grad_norm": 0.4917391538619995,
      "learning_rate": 4.34e-06,
      "loss": 0.3609,
      "step": 217
    },
    {
      "epoch": 0.0436,
      "grad_norm": 0.5565835237503052,
      "learning_rate": 4.360000000000001e-06,
      "loss": 0.3758,
      "step": 218
    },
    {
      "epoch": 0.0438,
      "grad_norm": 0.5132595300674438,
      "learning_rate": 4.38e-06,
      "loss": 0.3485,
      "step": 219
    },
    {
      "epoch": 0.044,
      "grad_norm": 0.530337393283844,
      "learning_rate": 4.4e-06,
      "loss": 0.3587,
      "step": 220
    },
    {
      "epoch": 0.0442,
      "grad_norm": 0.5031636357307434,
      "learning_rate": 4.42e-06,
      "loss": 0.4087,
      "step": 221
    },
    {
      "epoch": 0.0444,
      "grad_norm": 1.1977548599243164,
      "learning_rate": 4.440000000000001e-06,
      "loss": 0.3853,
      "step": 222
    },
    {
      "epoch": 0.0446,
      "grad_norm": 2.114367723464966,
      "learning_rate": 4.4600000000000005e-06,
      "loss": 0.3628,
      "step": 223
    },
    {
      "epoch": 0.0448,
      "grad_norm": 0.5317151546478271,
      "learning_rate": 4.48e-06,
      "loss": 0.4122,
      "step": 224
    },
    {
      "epoch": 0.045,
      "grad_norm": 0.6733126044273376,
      "learning_rate": 4.5e-06,
      "loss": 0.4,
      "step": 225
    },
    {
      "epoch": 0.0452,
      "grad_norm": 0.5606313943862915,
      "learning_rate": 4.520000000000001e-06,
      "loss": 0.3882,
      "step": 226
    },
    {
      "epoch": 0.0454,
      "grad_norm": 0.5461177229881287,
      "learning_rate": 4.540000000000001e-06,
      "loss": 0.3704,
      "step": 227
    },
    {
      "epoch": 0.0456,
      "grad_norm": 0.5400316715240479,
      "learning_rate": 4.56e-06,
      "loss": 0.3573,
      "step": 228
    },
    {
      "epoch": 0.0458,
      "grad_norm": 0.7844945192337036,
      "learning_rate": 4.58e-06,
      "loss": 0.3888,
      "step": 229
    },
    {
      "epoch": 0.046,
      "grad_norm": 0.5351366996765137,
      "learning_rate": 4.600000000000001e-06,
      "loss": 0.3387,
      "step": 230
    },
    {
      "epoch": 0.0462,
      "grad_norm": 0.5847746729850769,
      "learning_rate": 4.620000000000001e-06,
      "loss": 0.3843,
      "step": 231
    },
    {
      "epoch": 0.0464,
      "grad_norm": 0.49109357595443726,
      "learning_rate": 4.6400000000000005e-06,
      "loss": 0.3602,
      "step": 232
    },
    {
      "epoch": 0.0466,
      "grad_norm": 0.4675714075565338,
      "learning_rate": 4.66e-06,
      "loss": 0.356,
      "step": 233
    },
    {
      "epoch": 0.0468,
      "grad_norm": 0.5771036148071289,
      "learning_rate": 4.680000000000001e-06,
      "loss": 0.4025,
      "step": 234
    },
    {
      "epoch": 0.047,
      "grad_norm": 0.6511617302894592,
      "learning_rate": 4.7e-06,
      "loss": 0.3764,
      "step": 235
    },
    {
      "epoch": 0.0472,
      "grad_norm": 0.5542953610420227,
      "learning_rate": 4.7200000000000005e-06,
      "loss": 0.3887,
      "step": 236
    },
    {
      "epoch": 0.0474,
      "grad_norm": 0.5897404551506042,
      "learning_rate": 4.74e-06,
      "loss": 0.4009,
      "step": 237
    },
    {
      "epoch": 0.0476,
      "grad_norm": 0.48153606057167053,
      "learning_rate": 4.76e-06,
      "loss": 0.3269,
      "step": 238
    },
    {
      "epoch": 0.0478,
      "grad_norm": 0.46350377798080444,
      "learning_rate": 4.78e-06,
      "loss": 0.4011,
      "step": 239
    },
    {
      "epoch": 0.048,
      "grad_norm": 0.4434587061405182,
      "learning_rate": 4.800000000000001e-06,
      "loss": 0.342,
      "step": 240
    },
    {
      "epoch": 0.0482,
      "grad_norm": 0.7888356447219849,
      "learning_rate": 4.8200000000000004e-06,
      "loss": 0.349,
      "step": 241
    },
    {
      "epoch": 0.0484,
      "grad_norm": 0.5427084565162659,
      "learning_rate": 4.84e-06,
      "loss": 0.3913,
      "step": 242
    },
    {
      "epoch": 0.0486,
      "grad_norm": 0.7143595218658447,
      "learning_rate": 4.86e-06,
      "loss": 0.389,
      "step": 243
    },
    {
      "epoch": 0.0488,
      "grad_norm": 0.510613203048706,
      "learning_rate": 4.880000000000001e-06,
      "loss": 0.3433,
      "step": 244
    },
    {
      "epoch": 0.049,
      "grad_norm": 0.47642844915390015,
      "learning_rate": 4.9000000000000005e-06,
      "loss": 0.3582,
      "step": 245
    },
    {
      "epoch": 0.0492,
      "grad_norm": 0.7961475849151611,
      "learning_rate": 4.92e-06,
      "loss": 0.4021,
      "step": 246
    },
    {
      "epoch": 0.0494,
      "grad_norm": 0.5305927991867065,
      "learning_rate": 4.94e-06,
      "loss": 0.3863,
      "step": 247
    },
    {
      "epoch": 0.0496,
      "grad_norm": 0.685164749622345,
      "learning_rate": 4.960000000000001e-06,
      "loss": 0.3552,
      "step": 248
    },
    {
      "epoch": 0.0498,
      "grad_norm": 0.5103672742843628,
      "learning_rate": 4.980000000000001e-06,
      "loss": 0.3746,
      "step": 249
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.561657726764679,
      "learning_rate": 5e-06,
      "loss": 0.3869,
      "step": 250
    },
    {
      "epoch": 0.0502,
      "grad_norm": 0.450173556804657,
      "learning_rate": 5.02e-06,
      "loss": 0.3674,
      "step": 251
    },
    {
      "epoch": 0.0504,
      "grad_norm": 0.3885299265384674,
      "learning_rate": 5.04e-06,
      "loss": 0.339,
      "step": 252
    },
    {
      "epoch": 0.0506,
      "grad_norm": 0.47995996475219727,
      "learning_rate": 5.060000000000001e-06,
      "loss": 0.3924,
      "step": 253
    },
    {
      "epoch": 0.0508,
      "grad_norm": 0.5391303896903992,
      "learning_rate": 5.0800000000000005e-06,
      "loss": 0.3968,
      "step": 254
    },
    {
      "epoch": 0.051,
      "grad_norm": 0.4845463037490845,
      "learning_rate": 5.1e-06,
      "loss": 0.3603,
      "step": 255
    },
    {
      "epoch": 0.0512,
      "grad_norm": 0.469383180141449,
      "learning_rate": 5.12e-06,
      "loss": 0.3614,
      "step": 256
    },
    {
      "epoch": 0.0514,
      "grad_norm": 0.4680943787097931,
      "learning_rate": 5.140000000000001e-06,
      "loss": 0.3832,
      "step": 257
    },
    {
      "epoch": 0.0516,
      "grad_norm": 0.48814791440963745,
      "learning_rate": 5.1600000000000006e-06,
      "loss": 0.3887,
      "step": 258
    },
    {
      "epoch": 0.0518,
      "grad_norm": 0.5372050404548645,
      "learning_rate": 5.18e-06,
      "loss": 0.3717,
      "step": 259
    },
    {
      "epoch": 0.052,
      "grad_norm": 0.4863547682762146,
      "learning_rate": 5.2e-06,
      "loss": 0.3712,
      "step": 260
    },
    {
      "epoch": 0.0522,
      "grad_norm": 0.43458011746406555,
      "learning_rate": 5.220000000000001e-06,
      "loss": 0.3416,
      "step": 261
    },
    {
      "epoch": 0.0524,
      "grad_norm": 0.5313823223114014,
      "learning_rate": 5.240000000000001e-06,
      "loss": 0.3346,
      "step": 262
    },
    {
      "epoch": 0.0526,
      "grad_norm": 0.48250845074653625,
      "learning_rate": 5.2600000000000005e-06,
      "loss": 0.3623,
      "step": 263
    },
    {
      "epoch": 0.0528,
      "grad_norm": 0.843022882938385,
      "learning_rate": 5.28e-06,
      "loss": 0.4019,
      "step": 264
    },
    {
      "epoch": 0.053,
      "grad_norm": 0.493007093667984,
      "learning_rate": 5.300000000000001e-06,
      "loss": 0.3379,
      "step": 265
    },
    {
      "epoch": 0.0532,
      "grad_norm": 0.4395906329154968,
      "learning_rate": 5.320000000000001e-06,
      "loss": 0.3792,
      "step": 266
    },
    {
      "epoch": 0.0534,
      "grad_norm": 0.4727928340435028,
      "learning_rate": 5.3400000000000005e-06,
      "loss": 0.3746,
      "step": 267
    },
    {
      "epoch": 0.0536,
      "grad_norm": 0.4456286132335663,
      "learning_rate": 5.36e-06,
      "loss": 0.3033,
      "step": 268
    },
    {
      "epoch": 0.0538,
      "grad_norm": 0.47290152311325073,
      "learning_rate": 5.380000000000001e-06,
      "loss": 0.3473,
      "step": 269
    },
    {
      "epoch": 0.054,
      "grad_norm": 0.48750656843185425,
      "learning_rate": 5.400000000000001e-06,
      "loss": 0.348,
      "step": 270
    },
    {
      "epoch": 0.0542,
      "grad_norm": 0.8925968408584595,
      "learning_rate": 5.420000000000001e-06,
      "loss": 0.3736,
      "step": 271
    },
    {
      "epoch": 0.0544,
      "grad_norm": 0.5085983276367188,
      "learning_rate": 5.4400000000000004e-06,
      "loss": 0.3582,
      "step": 272
    },
    {
      "epoch": 0.0546,
      "grad_norm": 0.4193674921989441,
      "learning_rate": 5.460000000000001e-06,
      "loss": 0.3963,
      "step": 273
    },
    {
      "epoch": 0.0548,
      "grad_norm": 0.7106339335441589,
      "learning_rate": 5.480000000000001e-06,
      "loss": 0.3498,
      "step": 274
    },
    {
      "epoch": 0.055,
      "grad_norm": 0.7661458253860474,
      "learning_rate": 5.500000000000001e-06,
      "loss": 0.4031,
      "step": 275
    },
    {
      "epoch": 0.0552,
      "grad_norm": 0.4293787479400635,
      "learning_rate": 5.5200000000000005e-06,
      "loss": 0.3322,
      "step": 276
    },
    {
      "epoch": 0.0554,
      "grad_norm": 0.5811620950698853,
      "learning_rate": 5.540000000000001e-06,
      "loss": 0.339,
      "step": 277
    },
    {
      "epoch": 0.0556,
      "grad_norm": 0.4854693114757538,
      "learning_rate": 5.560000000000001e-06,
      "loss": 0.342,
      "step": 278
    },
    {
      "epoch": 0.0558,
      "grad_norm": 0.7552838325500488,
      "learning_rate": 5.580000000000001e-06,
      "loss": 0.3541,
      "step": 279
    },
    {
      "epoch": 0.056,
      "grad_norm": 1.169421672821045,
      "learning_rate": 5.600000000000001e-06,
      "loss": 0.3816,
      "step": 280
    },
    {
      "epoch": 0.0562,
      "grad_norm": 4.104032516479492,
      "learning_rate": 5.620000000000001e-06,
      "loss": 0.3529,
      "step": 281
    },
    {
      "epoch": 0.0564,
      "grad_norm": 0.49185436964035034,
      "learning_rate": 5.64e-06,
      "loss": 0.3397,
      "step": 282
    },
    {
      "epoch": 0.0566,
      "grad_norm": 0.42205214500427246,
      "learning_rate": 5.66e-06,
      "loss": 0.3817,
      "step": 283
    },
    {
      "epoch": 0.0568,
      "grad_norm": 0.4949096739292145,
      "learning_rate": 5.68e-06,
      "loss": 0.3581,
      "step": 284
    },
    {
      "epoch": 0.057,
      "grad_norm": 0.4883720576763153,
      "learning_rate": 5.7e-06,
      "loss": 0.3272,
      "step": 285
    },
    {
      "epoch": 0.0572,
      "grad_norm": 0.8542705774307251,
      "learning_rate": 5.72e-06,
      "loss": 0.3784,
      "step": 286
    },
    {
      "epoch": 0.0574,
      "grad_norm": 0.8221933841705322,
      "learning_rate": 5.74e-06,
      "loss": 0.3557,
      "step": 287
    },
    {
      "epoch": 0.0576,
      "grad_norm": 0.614643931388855,
      "learning_rate": 5.76e-06,
      "loss": 0.3826,
      "step": 288
    },
    {
      "epoch": 0.0578,
      "grad_norm": 0.5977184176445007,
      "learning_rate": 5.78e-06,
      "loss": 0.3673,
      "step": 289
    },
    {
      "epoch": 0.058,
      "grad_norm": 0.6216490864753723,
      "learning_rate": 5.8e-06,
      "loss": 0.4292,
      "step": 290
    },
    {
      "epoch": 0.0582,
      "grad_norm": 0.46331077814102173,
      "learning_rate": 5.82e-06,
      "loss": 0.3666,
      "step": 291
    },
    {
      "epoch": 0.0584,
      "grad_norm": 0.499988317489624,
      "learning_rate": 5.84e-06,
      "loss": 0.3641,
      "step": 292
    },
    {
      "epoch": 0.0586,
      "grad_norm": 0.5104328989982605,
      "learning_rate": 5.86e-06,
      "loss": 0.3741,
      "step": 293
    },
    {
      "epoch": 0.0588,
      "grad_norm": 0.5588429570198059,
      "learning_rate": 5.8800000000000005e-06,
      "loss": 0.3896,
      "step": 294
    },
    {
      "epoch": 0.059,
      "grad_norm": 0.48014357686042786,
      "learning_rate": 5.9e-06,
      "loss": 0.345,
      "step": 295
    },
    {
      "epoch": 0.0592,
      "grad_norm": 0.37337514758110046,
      "learning_rate": 5.92e-06,
      "loss": 0.3188,
      "step": 296
    },
    {
      "epoch": 0.0594,
      "grad_norm": 0.5023643374443054,
      "learning_rate": 5.94e-06,
      "loss": 0.3925,
      "step": 297
    },
    {
      "epoch": 0.0596,
      "grad_norm": 0.5904903411865234,
      "learning_rate": 5.9600000000000005e-06,
      "loss": 0.3751,
      "step": 298
    },
    {
      "epoch": 0.0598,
      "grad_norm": 0.4945261478424072,
      "learning_rate": 5.98e-06,
      "loss": 0.3408,
      "step": 299
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.46068504452705383,
      "learning_rate": 6e-06,
      "loss": 0.4142,
      "step": 300
    },
    {
      "epoch": 0.0602,
      "grad_norm": 0.5857117772102356,
      "learning_rate": 6.02e-06,
      "loss": 0.3697,
      "step": 301
    },
    {
      "epoch": 0.0604,
      "grad_norm": 0.48937422037124634,
      "learning_rate": 6.040000000000001e-06,
      "loss": 0.3984,
      "step": 302
    },
    {
      "epoch": 0.0606,
      "grad_norm": 0.4506399929523468,
      "learning_rate": 6.0600000000000004e-06,
      "loss": 0.3589,
      "step": 303
    },
    {
      "epoch": 0.0608,
      "grad_norm": 0.5338152050971985,
      "learning_rate": 6.08e-06,
      "loss": 0.3435,
      "step": 304
    },
    {
      "epoch": 0.061,
      "grad_norm": 0.5369296669960022,
      "learning_rate": 6.1e-06,
      "loss": 0.357,
      "step": 305
    },
    {
      "epoch": 0.0612,
      "grad_norm": 0.44453251361846924,
      "learning_rate": 6.120000000000001e-06,
      "loss": 0.3694,
      "step": 306
    },
    {
      "epoch": 0.0614,
      "grad_norm": 0.47694671154022217,
      "learning_rate": 6.1400000000000005e-06,
      "loss": 0.3645,
      "step": 307
    },
    {
      "epoch": 0.0616,
      "grad_norm": 1.1780699491500854,
      "learning_rate": 6.16e-06,
      "loss": 0.3251,
      "step": 308
    },
    {
      "epoch": 0.0618,
      "grad_norm": 0.770831823348999,
      "learning_rate": 6.18e-06,
      "loss": 0.3992,
      "step": 309
    },
    {
      "epoch": 0.062,
      "grad_norm": 0.49005967378616333,
      "learning_rate": 6.200000000000001e-06,
      "loss": 0.3858,
      "step": 310
    },
    {
      "epoch": 0.0622,
      "grad_norm": 0.5764769315719604,
      "learning_rate": 6.220000000000001e-06,
      "loss": 0.3993,
      "step": 311
    },
    {
      "epoch": 0.0624,
      "grad_norm": 0.4973750412464142,
      "learning_rate": 6.24e-06,
      "loss": 0.3528,
      "step": 312
    },
    {
      "epoch": 0.0626,
      "grad_norm": 0.7890161275863647,
      "learning_rate": 6.26e-06,
      "loss": 0.3418,
      "step": 313
    },
    {
      "epoch": 0.0628,
      "grad_norm": 0.49862977862358093,
      "learning_rate": 6.280000000000001e-06,
      "loss": 0.3942,
      "step": 314
    },
    {
      "epoch": 0.063,
      "grad_norm": 0.45435455441474915,
      "learning_rate": 6.300000000000001e-06,
      "loss": 0.3808,
      "step": 315
    },
    {
      "epoch": 0.0632,
      "grad_norm": 0.5637235641479492,
      "learning_rate": 6.3200000000000005e-06,
      "loss": 0.3787,
      "step": 316
    },
    {
      "epoch": 0.0634,
      "grad_norm": 0.5251534581184387,
      "learning_rate": 6.34e-06,
      "loss": 0.3611,
      "step": 317
    },
    {
      "epoch": 0.0636,
      "grad_norm": 0.6036379337310791,
      "learning_rate": 6.360000000000001e-06,
      "loss": 0.3631,
      "step": 318
    },
    {
      "epoch": 0.0638,
      "grad_norm": 0.41473501920700073,
      "learning_rate": 6.380000000000001e-06,
      "loss": 0.354,
      "step": 319
    },
    {
      "epoch": 0.064,
      "grad_norm": 0.5441268086433411,
      "learning_rate": 6.4000000000000006e-06,
      "loss": 0.362,
      "step": 320
    },
    {
      "epoch": 0.0642,
      "grad_norm": 0.5704363584518433,
      "learning_rate": 6.42e-06,
      "loss": 0.3835,
      "step": 321
    },
    {
      "epoch": 0.0644,
      "grad_norm": 0.5045521259307861,
      "learning_rate": 6.440000000000001e-06,
      "loss": 0.3453,
      "step": 322
    },
    {
      "epoch": 0.0646,
      "grad_norm": 0.5521782040596008,
      "learning_rate": 6.460000000000001e-06,
      "loss": 0.4002,
      "step": 323
    },
    {
      "epoch": 0.0648,
      "grad_norm": 0.50490802526474,
      "learning_rate": 6.480000000000001e-06,
      "loss": 0.382,
      "step": 324
    },
    {
      "epoch": 0.065,
      "grad_norm": 0.4222056567668915,
      "learning_rate": 6.5000000000000004e-06,
      "loss": 0.3506,
      "step": 325
    },
    {
      "epoch": 0.0652,
      "grad_norm": 0.5234901309013367,
      "learning_rate": 6.520000000000001e-06,
      "loss": 0.3902,
      "step": 326
    },
    {
      "epoch": 0.0654,
      "grad_norm": 0.5196935534477234,
      "learning_rate": 6.540000000000001e-06,
      "loss": 0.3601,
      "step": 327
    },
    {
      "epoch": 0.0656,
      "grad_norm": 0.5383853316307068,
      "learning_rate": 6.560000000000001e-06,
      "loss": 0.3843,
      "step": 328
    },
    {
      "epoch": 0.0658,
      "grad_norm": 0.5251453518867493,
      "learning_rate": 6.5800000000000005e-06,
      "loss": 0.3451,
      "step": 329
    },
    {
      "epoch": 0.066,
      "grad_norm": 0.5208432674407959,
      "learning_rate": 6.600000000000001e-06,
      "loss": 0.3858,
      "step": 330
    },
    {
      "epoch": 0.0662,
      "grad_norm": 0.6581575870513916,
      "learning_rate": 6.620000000000001e-06,
      "loss": 0.3288,
      "step": 331
    },
    {
      "epoch": 0.0664,
      "grad_norm": 0.5253073573112488,
      "learning_rate": 6.640000000000001e-06,
      "loss": 0.3837,
      "step": 332
    },
    {
      "epoch": 0.0666,
      "grad_norm": 1.3243104219436646,
      "learning_rate": 6.660000000000001e-06,
      "loss": 0.3645,
      "step": 333
    },
    {
      "epoch": 0.0668,
      "grad_norm": 0.5156260132789612,
      "learning_rate": 6.680000000000001e-06,
      "loss": 0.3482,
      "step": 334
    },
    {
      "epoch": 0.067,
      "grad_norm": 0.5392702221870422,
      "learning_rate": 6.700000000000001e-06,
      "loss": 0.3852,
      "step": 335
    },
    {
      "epoch": 0.0672,
      "grad_norm": 0.543277382850647,
      "learning_rate": 6.720000000000001e-06,
      "loss": 0.3143,
      "step": 336
    },
    {
      "epoch": 0.0674,
      "grad_norm": 0.4807863235473633,
      "learning_rate": 6.740000000000001e-06,
      "loss": 0.3493,
      "step": 337
    },
    {
      "epoch": 0.0676,
      "grad_norm": 0.5871647000312805,
      "learning_rate": 6.760000000000001e-06,
      "loss": 0.3946,
      "step": 338
    },
    {
      "epoch": 0.0678,
      "grad_norm": 0.6702607274055481,
      "learning_rate": 6.780000000000001e-06,
      "loss": 0.3501,
      "step": 339
    },
    {
      "epoch": 0.068,
      "grad_norm": 0.42674872279167175,
      "learning_rate": 6.800000000000001e-06,
      "loss": 0.3505,
      "step": 340
    },
    {
      "epoch": 0.0682,
      "grad_norm": 1.036482572555542,
      "learning_rate": 6.820000000000001e-06,
      "loss": 0.3756,
      "step": 341
    },
    {
      "epoch": 0.0684,
      "grad_norm": 0.5130389928817749,
      "learning_rate": 6.8400000000000014e-06,
      "loss": 0.3424,
      "step": 342
    },
    {
      "epoch": 0.0686,
      "grad_norm": 0.46081650257110596,
      "learning_rate": 6.860000000000001e-06,
      "loss": 0.3748,
      "step": 343
    },
    {
      "epoch": 0.0688,
      "grad_norm": 0.6402235627174377,
      "learning_rate": 6.88e-06,
      "loss": 0.3982,
      "step": 344
    },
    {
      "epoch": 0.069,
      "grad_norm": 0.5077682733535767,
      "learning_rate": 6.9e-06,
      "loss": 0.3627,
      "step": 345
    },
    {
      "epoch": 0.0692,
      "grad_norm": 0.5027735829353333,
      "learning_rate": 6.92e-06,
      "loss": 0.3715,
      "step": 346
    },
    {
      "epoch": 0.0694,
      "grad_norm": 0.5081953406333923,
      "learning_rate": 6.9400000000000005e-06,
      "loss": 0.3488,
      "step": 347
    },
    {
      "epoch": 0.0696,
      "grad_norm": 0.549768328666687,
      "learning_rate": 6.96e-06,
      "loss": 0.3862,
      "step": 348
    },
    {
      "epoch": 0.0698,
      "grad_norm": 0.5405619740486145,
      "learning_rate": 6.98e-06,
      "loss": 0.3897,
      "step": 349
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5158597826957703,
      "learning_rate": 7e-06,
      "loss": 0.3475,
      "step": 350
    },
    {
      "epoch": 0.0702,
      "grad_norm": 2.6695237159729004,
      "learning_rate": 7.0200000000000006e-06,
      "loss": 0.3817,
      "step": 351
    },
    {
      "epoch": 0.0704,
      "grad_norm": 0.5735749006271362,
      "learning_rate": 7.04e-06,
      "loss": 0.3403,
      "step": 352
    },
    {
      "epoch": 0.0706,
      "grad_norm": 0.48250052332878113,
      "learning_rate": 7.06e-06,
      "loss": 0.341,
      "step": 353
    },
    {
      "epoch": 0.0708,
      "grad_norm": 0.5216788053512573,
      "learning_rate": 7.08e-06,
      "loss": 0.3706,
      "step": 354
    },
    {
      "epoch": 0.071,
      "grad_norm": 0.6020796895027161,
      "learning_rate": 7.100000000000001e-06,
      "loss": 0.3504,
      "step": 355
    },
    {
      "epoch": 0.0712,
      "grad_norm": 0.5250270962715149,
      "learning_rate": 7.1200000000000004e-06,
      "loss": 0.3615,
      "step": 356
    },
    {
      "epoch": 0.0714,
      "grad_norm": 0.884941041469574,
      "learning_rate": 7.14e-06,
      "loss": 0.3505,
      "step": 357
    },
    {
      "epoch": 0.0716,
      "grad_norm": 0.44191989302635193,
      "learning_rate": 7.16e-06,
      "loss": 0.3397,
      "step": 358
    },
    {
      "epoch": 0.0718,
      "grad_norm": 0.5041542053222656,
      "learning_rate": 7.180000000000001e-06,
      "loss": 0.36,
      "step": 359
    },
    {
      "epoch": 0.072,
      "grad_norm": 1.16036856174469,
      "learning_rate": 7.2000000000000005e-06,
      "loss": 0.3736,
      "step": 360
    },
    {
      "epoch": 0.0722,
      "grad_norm": 0.49549126625061035,
      "learning_rate": 7.22e-06,
      "loss": 0.3535,
      "step": 361
    },
    {
      "epoch": 0.0724,
      "grad_norm": 0.6692564487457275,
      "learning_rate": 7.24e-06,
      "loss": 0.3528,
      "step": 362
    },
    {
      "epoch": 0.0726,
      "grad_norm": 0.6063247323036194,
      "learning_rate": 7.260000000000001e-06,
      "loss": 0.3516,
      "step": 363
    },
    {
      "epoch": 0.0728,
      "grad_norm": 0.5167468190193176,
      "learning_rate": 7.280000000000001e-06,
      "loss": 0.3427,
      "step": 364
    },
    {
      "epoch": 0.073,
      "grad_norm": 0.5150909423828125,
      "learning_rate": 7.3e-06,
      "loss": 0.3463,
      "step": 365
    },
    {
      "epoch": 0.0732,
      "grad_norm": 0.48887062072753906,
      "learning_rate": 7.32e-06,
      "loss": 0.3812,
      "step": 366
    },
    {
      "epoch": 0.0734,
      "grad_norm": 0.5032296180725098,
      "learning_rate": 7.340000000000001e-06,
      "loss": 0.3766,
      "step": 367
    },
    {
      "epoch": 0.0736,
      "grad_norm": 0.5447237491607666,
      "learning_rate": 7.360000000000001e-06,
      "loss": 0.356,
      "step": 368
    },
    {
      "epoch": 0.0738,
      "grad_norm": 0.9454934000968933,
      "learning_rate": 7.3800000000000005e-06,
      "loss": 0.3867,
      "step": 369
    },
    {
      "epoch": 0.074,
      "grad_norm": 0.6908373832702637,
      "learning_rate": 7.4e-06,
      "loss": 0.3869,
      "step": 370
    },
    {
      "epoch": 0.0742,
      "grad_norm": 0.546795666217804,
      "learning_rate": 7.420000000000001e-06,
      "loss": 0.3731,
      "step": 371
    },
    {
      "epoch": 0.0744,
      "grad_norm": 0.6124436855316162,
      "learning_rate": 7.440000000000001e-06,
      "loss": 0.3613,
      "step": 372
    },
    {
      "epoch": 0.0746,
      "grad_norm": 0.4887937605381012,
      "learning_rate": 7.4600000000000006e-06,
      "loss": 0.3613,
      "step": 373
    },
    {
      "epoch": 0.0748,
      "grad_norm": 0.6965321898460388,
      "learning_rate": 7.48e-06,
      "loss": 0.3623,
      "step": 374
    },
    {
      "epoch": 0.075,
      "grad_norm": 0.5320557951927185,
      "learning_rate": 7.500000000000001e-06,
      "loss": 0.3514,
      "step": 375
    },
    {
      "epoch": 0.0752,
      "grad_norm": 0.46270841360092163,
      "learning_rate": 7.520000000000001e-06,
      "loss": 0.3239,
      "step": 376
    },
    {
      "epoch": 0.0754,
      "grad_norm": 0.5304298996925354,
      "learning_rate": 7.540000000000001e-06,
      "loss": 0.3533,
      "step": 377
    },
    {
      "epoch": 0.0756,
      "grad_norm": 0.4981796443462372,
      "learning_rate": 7.5600000000000005e-06,
      "loss": 0.3545,
      "step": 378
    },
    {
      "epoch": 0.0758,
      "grad_norm": 0.4829084873199463,
      "learning_rate": 7.58e-06,
      "loss": 0.3675,
      "step": 379
    },
    {
      "epoch": 0.076,
      "grad_norm": 0.5086835622787476,
      "learning_rate": 7.600000000000001e-06,
      "loss": 0.3413,
      "step": 380
    },
    {
      "epoch": 0.0762,
      "grad_norm": 0.5108396410942078,
      "learning_rate": 7.620000000000001e-06,
      "loss": 0.3417,
      "step": 381
    },
    {
      "epoch": 0.0764,
      "grad_norm": 0.4282059073448181,
      "learning_rate": 7.640000000000001e-06,
      "loss": 0.3496,
      "step": 382
    },
    {
      "epoch": 0.0766,
      "grad_norm": 0.4752441644668579,
      "learning_rate": 7.660000000000001e-06,
      "loss": 0.3648,
      "step": 383
    },
    {
      "epoch": 0.0768,
      "grad_norm": 0.5616829991340637,
      "learning_rate": 7.680000000000001e-06,
      "loss": 0.3601,
      "step": 384
    },
    {
      "epoch": 0.077,
      "grad_norm": 0.5348000526428223,
      "learning_rate": 7.7e-06,
      "loss": 0.3601,
      "step": 385
    },
    {
      "epoch": 0.0772,
      "grad_norm": 0.6081492304801941,
      "learning_rate": 7.72e-06,
      "loss": 0.3869,
      "step": 386
    },
    {
      "epoch": 0.0774,
      "grad_norm": 0.554927408695221,
      "learning_rate": 7.74e-06,
      "loss": 0.3605,
      "step": 387
    },
    {
      "epoch": 0.0776,
      "grad_norm": 0.6582492589950562,
      "learning_rate": 7.76e-06,
      "loss": 0.3763,
      "step": 388
    },
    {
      "epoch": 0.0778,
      "grad_norm": 0.7177427411079407,
      "learning_rate": 7.78e-06,
      "loss": 0.3642,
      "step": 389
    },
    {
      "epoch": 0.078,
      "grad_norm": 0.5401477813720703,
      "learning_rate": 7.800000000000002e-06,
      "loss": 0.3687,
      "step": 390
    },
    {
      "epoch": 0.0782,
      "grad_norm": 0.671538770198822,
      "learning_rate": 7.820000000000001e-06,
      "loss": 0.3804,
      "step": 391
    },
    {
      "epoch": 0.0784,
      "grad_norm": 0.6063227653503418,
      "learning_rate": 7.840000000000001e-06,
      "loss": 0.3402,
      "step": 392
    },
    {
      "epoch": 0.0786,
      "grad_norm": 0.6349709630012512,
      "learning_rate": 7.860000000000001e-06,
      "loss": 0.3571,
      "step": 393
    },
    {
      "epoch": 0.0788,
      "grad_norm": 0.7782562971115112,
      "learning_rate": 7.88e-06,
      "loss": 0.336,
      "step": 394
    },
    {
      "epoch": 0.079,
      "grad_norm": 0.7077436447143555,
      "learning_rate": 7.9e-06,
      "loss": 0.3675,
      "step": 395
    },
    {
      "epoch": 0.0792,
      "grad_norm": 0.6595727205276489,
      "learning_rate": 7.92e-06,
      "loss": 0.3608,
      "step": 396
    },
    {
      "epoch": 0.0794,
      "grad_norm": 0.527309238910675,
      "learning_rate": 7.94e-06,
      "loss": 0.3547,
      "step": 397
    },
    {
      "epoch": 0.0796,
      "grad_norm": 0.5047811269760132,
      "learning_rate": 7.960000000000002e-06,
      "loss": 0.3674,
      "step": 398
    },
    {
      "epoch": 0.0798,
      "grad_norm": 0.5931805372238159,
      "learning_rate": 7.980000000000002e-06,
      "loss": 0.3642,
      "step": 399
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.484808087348938,
      "learning_rate": 8.000000000000001e-06,
      "loss": 0.353,
      "step": 400
    },
    {
      "epoch": 0.0802,
      "grad_norm": 0.5031731128692627,
      "learning_rate": 8.020000000000001e-06,
      "loss": 0.3524,
      "step": 401
    },
    {
      "epoch": 0.0804,
      "grad_norm": 0.5104628205299377,
      "learning_rate": 8.040000000000001e-06,
      "loss": 0.317,
      "step": 402
    },
    {
      "epoch": 0.0806,
      "grad_norm": 0.4780406951904297,
      "learning_rate": 8.06e-06,
      "loss": 0.3375,
      "step": 403
    },
    {
      "epoch": 0.0808,
      "grad_norm": 0.5637823939323425,
      "learning_rate": 8.08e-06,
      "loss": 0.3596,
      "step": 404
    },
    {
      "epoch": 0.081,
      "grad_norm": 0.4751672148704529,
      "learning_rate": 8.1e-06,
      "loss": 0.3219,
      "step": 405
    },
    {
      "epoch": 0.0812,
      "grad_norm": 0.6096263527870178,
      "learning_rate": 8.120000000000002e-06,
      "loss": 0.3519,
      "step": 406
    },
    {
      "epoch": 0.0814,
      "grad_norm": 0.443376749753952,
      "learning_rate": 8.14e-06,
      "loss": 0.3446,
      "step": 407
    },
    {
      "epoch": 0.0816,
      "grad_norm": 0.6747735738754272,
      "learning_rate": 8.16e-06,
      "loss": 0.3659,
      "step": 408
    },
    {
      "epoch": 0.0818,
      "grad_norm": 0.43160316348075867,
      "learning_rate": 8.18e-06,
      "loss": 0.3654,
      "step": 409
    },
    {
      "epoch": 0.082,
      "grad_norm": 0.9620841145515442,
      "learning_rate": 8.2e-06,
      "loss": 0.3591,
      "step": 410
    },
    {
      "epoch": 0.0822,
      "grad_norm": 0.5405911803245544,
      "learning_rate": 8.220000000000001e-06,
      "loss": 0.3992,
      "step": 411
    },
    {
      "epoch": 0.0824,
      "grad_norm": 0.4966471791267395,
      "learning_rate": 8.24e-06,
      "loss": 0.3658,
      "step": 412
    },
    {
      "epoch": 0.0826,
      "grad_norm": 0.616311252117157,
      "learning_rate": 8.26e-06,
      "loss": 0.3658,
      "step": 413
    },
    {
      "epoch": 0.0828,
      "grad_norm": 0.4911673963069916,
      "learning_rate": 8.28e-06,
      "loss": 0.3933,
      "step": 414
    },
    {
      "epoch": 0.083,
      "grad_norm": 0.4810709059238434,
      "learning_rate": 8.3e-06,
      "loss": 0.3566,
      "step": 415
    },
    {
      "epoch": 0.0832,
      "grad_norm": 0.5234078764915466,
      "learning_rate": 8.32e-06,
      "loss": 0.3809,
      "step": 416
    },
    {
      "epoch": 0.0834,
      "grad_norm": 0.6064195036888123,
      "learning_rate": 8.34e-06,
      "loss": 0.3659,
      "step": 417
    },
    {
      "epoch": 0.0836,
      "grad_norm": 0.4819906949996948,
      "learning_rate": 8.36e-06,
      "loss": 0.3463,
      "step": 418
    },
    {
      "epoch": 0.0838,
      "grad_norm": 0.5467087626457214,
      "learning_rate": 8.380000000000001e-06,
      "loss": 0.386,
      "step": 419
    },
    {
      "epoch": 0.084,
      "grad_norm": 0.6791503429412842,
      "learning_rate": 8.400000000000001e-06,
      "loss": 0.3364,
      "step": 420
    },
    {
      "epoch": 0.0842,
      "grad_norm": 0.5661970973014832,
      "learning_rate": 8.42e-06,
      "loss": 0.324,
      "step": 421
    },
    {
      "epoch": 0.0844,
      "grad_norm": 0.7056400775909424,
      "learning_rate": 8.44e-06,
      "loss": 0.3654,
      "step": 422
    },
    {
      "epoch": 0.0846,
      "grad_norm": 0.568681538105011,
      "learning_rate": 8.46e-06,
      "loss": 0.3813,
      "step": 423
    },
    {
      "epoch": 0.0848,
      "grad_norm": 0.5368359088897705,
      "learning_rate": 8.48e-06,
      "loss": 0.3851,
      "step": 424
    },
    {
      "epoch": 0.085,
      "grad_norm": 0.4438701272010803,
      "learning_rate": 8.5e-06,
      "loss": 0.3613,
      "step": 425
    },
    {
      "epoch": 0.0852,
      "grad_norm": 0.556397557258606,
      "learning_rate": 8.52e-06,
      "loss": 0.3599,
      "step": 426
    },
    {
      "epoch": 0.0854,
      "grad_norm": 0.5753066539764404,
      "learning_rate": 8.540000000000001e-06,
      "loss": 0.3573,
      "step": 427
    },
    {
      "epoch": 0.0856,
      "grad_norm": 0.4863491654396057,
      "learning_rate": 8.560000000000001e-06,
      "loss": 0.3423,
      "step": 428
    },
    {
      "epoch": 0.0858,
      "grad_norm": 0.552019476890564,
      "learning_rate": 8.580000000000001e-06,
      "loss": 0.3842,
      "step": 429
    },
    {
      "epoch": 0.086,
      "grad_norm": 0.5308921337127686,
      "learning_rate": 8.6e-06,
      "loss": 0.381,
      "step": 430
    },
    {
      "epoch": 0.0862,
      "grad_norm": 0.5407894849777222,
      "learning_rate": 8.62e-06,
      "loss": 0.4099,
      "step": 431
    },
    {
      "epoch": 0.0864,
      "grad_norm": 0.5364815592765808,
      "learning_rate": 8.64e-06,
      "loss": 0.3894,
      "step": 432
    },
    {
      "epoch": 0.0866,
      "grad_norm": 0.5563845038414001,
      "learning_rate": 8.66e-06,
      "loss": 0.3497,
      "step": 433
    },
    {
      "epoch": 0.0868,
      "grad_norm": 0.5083327293395996,
      "learning_rate": 8.68e-06,
      "loss": 0.3444,
      "step": 434
    },
    {
      "epoch": 0.087,
      "grad_norm": 0.5034142732620239,
      "learning_rate": 8.700000000000001e-06,
      "loss": 0.3895,
      "step": 435
    },
    {
      "epoch": 0.0872,
      "grad_norm": 0.5671902298927307,
      "learning_rate": 8.720000000000001e-06,
      "loss": 0.401,
      "step": 436
    },
    {
      "epoch": 0.0874,
      "grad_norm": 0.6155437231063843,
      "learning_rate": 8.740000000000001e-06,
      "loss": 0.371,
      "step": 437
    },
    {
      "epoch": 0.0876,
      "grad_norm": 0.6367061138153076,
      "learning_rate": 8.76e-06,
      "loss": 0.3678,
      "step": 438
    },
    {
      "epoch": 0.0878,
      "grad_norm": 0.686025857925415,
      "learning_rate": 8.78e-06,
      "loss": 0.3611,
      "step": 439
    },
    {
      "epoch": 0.088,
      "grad_norm": 0.7167381048202515,
      "learning_rate": 8.8e-06,
      "loss": 0.4024,
      "step": 440
    },
    {
      "epoch": 0.0882,
      "grad_norm": 0.513490617275238,
      "learning_rate": 8.82e-06,
      "loss": 0.3977,
      "step": 441
    },
    {
      "epoch": 0.0884,
      "grad_norm": 0.511195719242096,
      "learning_rate": 8.84e-06,
      "loss": 0.3569,
      "step": 442
    },
    {
      "epoch": 0.0886,
      "grad_norm": 0.5956668853759766,
      "learning_rate": 8.860000000000002e-06,
      "loss": 0.3459,
      "step": 443
    },
    {
      "epoch": 0.0888,
      "grad_norm": 0.49588754773139954,
      "learning_rate": 8.880000000000001e-06,
      "loss": 0.3404,
      "step": 444
    },
    {
      "epoch": 0.089,
      "grad_norm": 0.6357727646827698,
      "learning_rate": 8.900000000000001e-06,
      "loss": 0.3631,
      "step": 445
    },
    {
      "epoch": 0.0892,
      "grad_norm": 0.5857269167900085,
      "learning_rate": 8.920000000000001e-06,
      "loss": 0.3413,
      "step": 446
    },
    {
      "epoch": 0.0894,
      "grad_norm": 0.6178385615348816,
      "learning_rate": 8.94e-06,
      "loss": 0.359,
      "step": 447
    },
    {
      "epoch": 0.0896,
      "grad_norm": 0.4877795875072479,
      "learning_rate": 8.96e-06,
      "loss": 0.3657,
      "step": 448
    },
    {
      "epoch": 0.0898,
      "grad_norm": 0.4381040930747986,
      "learning_rate": 8.98e-06,
      "loss": 0.3396,
      "step": 449
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.5485188961029053,
      "learning_rate": 9e-06,
      "loss": 0.3656,
      "step": 450
    },
    {
      "epoch": 0.0902,
      "grad_norm": 0.4523361623287201,
      "learning_rate": 9.020000000000002e-06,
      "loss": 0.344,
      "step": 451
    },
    {
      "epoch": 0.0904,
      "grad_norm": 0.501225471496582,
      "learning_rate": 9.040000000000002e-06,
      "loss": 0.341,
      "step": 452
    },
    {
      "epoch": 0.0906,
      "grad_norm": 0.7553176879882812,
      "learning_rate": 9.060000000000001e-06,
      "loss": 0.362,
      "step": 453
    },
    {
      "epoch": 0.0908,
      "grad_norm": 0.4828166961669922,
      "learning_rate": 9.080000000000001e-06,
      "loss": 0.3373,
      "step": 454
    },
    {
      "epoch": 0.091,
      "grad_norm": 0.45987793803215027,
      "learning_rate": 9.100000000000001e-06,
      "loss": 0.3256,
      "step": 455
    },
    {
      "epoch": 0.0912,
      "grad_norm": 0.8105843663215637,
      "learning_rate": 9.12e-06,
      "loss": 0.3619,
      "step": 456
    },
    {
      "epoch": 0.0914,
      "grad_norm": 0.5299851298332214,
      "learning_rate": 9.14e-06,
      "loss": 0.3912,
      "step": 457
    },
    {
      "epoch": 0.0916,
      "grad_norm": 0.5225638151168823,
      "learning_rate": 9.16e-06,
      "loss": 0.3082,
      "step": 458
    },
    {
      "epoch": 0.0918,
      "grad_norm": 0.5109646320343018,
      "learning_rate": 9.180000000000002e-06,
      "loss": 0.3584,
      "step": 459
    },
    {
      "epoch": 0.092,
      "grad_norm": 0.47401461005210876,
      "learning_rate": 9.200000000000002e-06,
      "loss": 0.3421,
      "step": 460
    },
    {
      "epoch": 0.0922,
      "grad_norm": 0.5220910310745239,
      "learning_rate": 9.220000000000002e-06,
      "loss": 0.3442,
      "step": 461
    },
    {
      "epoch": 0.0924,
      "grad_norm": 0.6377078294754028,
      "learning_rate": 9.240000000000001e-06,
      "loss": 0.3611,
      "step": 462
    },
    {
      "epoch": 0.0926,
      "grad_norm": 0.5247963070869446,
      "learning_rate": 9.260000000000001e-06,
      "loss": 0.357,
      "step": 463
    },
    {
      "epoch": 0.0928,
      "grad_norm": 0.5039740800857544,
      "learning_rate": 9.280000000000001e-06,
      "loss": 0.3425,
      "step": 464
    },
    {
      "epoch": 0.093,
      "grad_norm": 0.9030851125717163,
      "learning_rate": 9.3e-06,
      "loss": 0.3878,
      "step": 465
    },
    {
      "epoch": 0.0932,
      "grad_norm": 0.5864378213882446,
      "learning_rate": 9.32e-06,
      "loss": 0.3699,
      "step": 466
    },
    {
      "epoch": 0.0934,
      "grad_norm": 0.5653566718101501,
      "learning_rate": 9.340000000000002e-06,
      "loss": 0.3688,
      "step": 467
    },
    {
      "epoch": 0.0936,
      "grad_norm": 0.46178269386291504,
      "learning_rate": 9.360000000000002e-06,
      "loss": 0.3689,
      "step": 468
    },
    {
      "epoch": 0.0938,
      "grad_norm": 0.5494042038917542,
      "learning_rate": 9.38e-06,
      "loss": 0.3563,
      "step": 469
    },
    {
      "epoch": 0.094,
      "grad_norm": 0.5124475955963135,
      "learning_rate": 9.4e-06,
      "loss": 0.3726,
      "step": 470
    },
    {
      "epoch": 0.0942,
      "grad_norm": 0.5524993538856506,
      "learning_rate": 9.42e-06,
      "loss": 0.3719,
      "step": 471
    },
    {
      "epoch": 0.0944,
      "grad_norm": 0.5128179788589478,
      "learning_rate": 9.440000000000001e-06,
      "loss": 0.3838,
      "step": 472
    },
    {
      "epoch": 0.0946,
      "grad_norm": 0.8502198457717896,
      "learning_rate": 9.460000000000001e-06,
      "loss": 0.3488,
      "step": 473
    },
    {
      "epoch": 0.0948,
      "grad_norm": 0.40756651759147644,
      "learning_rate": 9.48e-06,
      "loss": 0.3262,
      "step": 474
    },
    {
      "epoch": 0.095,
      "grad_norm": 0.43796393275260925,
      "learning_rate": 9.5e-06,
      "loss": 0.3505,
      "step": 475
    },
    {
      "epoch": 0.0952,
      "grad_norm": 0.49729669094085693,
      "learning_rate": 9.52e-06,
      "loss": 0.3363,
      "step": 476
    },
    {
      "epoch": 0.0954,
      "grad_norm": 0.4638373851776123,
      "learning_rate": 9.54e-06,
      "loss": 0.347,
      "step": 477
    },
    {
      "epoch": 0.0956,
      "grad_norm": 0.5026688575744629,
      "learning_rate": 9.56e-06,
      "loss": 0.3524,
      "step": 478
    },
    {
      "epoch": 0.0958,
      "grad_norm": 0.5149339437484741,
      "learning_rate": 9.58e-06,
      "loss": 0.3927,
      "step": 479
    },
    {
      "epoch": 0.096,
      "grad_norm": 0.5217307209968567,
      "learning_rate": 9.600000000000001e-06,
      "loss": 0.3499,
      "step": 480
    },
    {
      "epoch": 0.0962,
      "grad_norm": 0.4903452694416046,
      "learning_rate": 9.620000000000001e-06,
      "loss": 0.3869,
      "step": 481
    },
    {
      "epoch": 0.0964,
      "grad_norm": 0.5208938121795654,
      "learning_rate": 9.640000000000001e-06,
      "loss": 0.3836,
      "step": 482
    },
    {
      "epoch": 0.0966,
      "grad_norm": 0.4571824371814728,
      "learning_rate": 9.66e-06,
      "loss": 0.3722,
      "step": 483
    },
    {
      "epoch": 0.0968,
      "grad_norm": 0.42198646068573,
      "learning_rate": 9.68e-06,
      "loss": 0.3127,
      "step": 484
    },
    {
      "epoch": 0.097,
      "grad_norm": 0.4596642851829529,
      "learning_rate": 9.7e-06,
      "loss": 0.3381,
      "step": 485
    },
    {
      "epoch": 0.0972,
      "grad_norm": 0.5270529389381409,
      "learning_rate": 9.72e-06,
      "loss": 0.3411,
      "step": 486
    },
    {
      "epoch": 0.0974,
      "grad_norm": 0.541283905506134,
      "learning_rate": 9.74e-06,
      "loss": 0.3507,
      "step": 487
    },
    {
      "epoch": 0.0976,
      "grad_norm": 0.7791691422462463,
      "learning_rate": 9.760000000000001e-06,
      "loss": 0.3353,
      "step": 488
    },
    {
      "epoch": 0.0978,
      "grad_norm": 0.40997469425201416,
      "learning_rate": 9.780000000000001e-06,
      "loss": 0.3335,
      "step": 489
    },
    {
      "epoch": 0.098,
      "grad_norm": 0.43703579902648926,
      "learning_rate": 9.800000000000001e-06,
      "loss": 0.3352,
      "step": 490
    },
    {
      "epoch": 0.0982,
      "grad_norm": 0.6034693121910095,
      "learning_rate": 9.820000000000001e-06,
      "loss": 0.3588,
      "step": 491
    },
    {
      "epoch": 0.0984,
      "grad_norm": 0.5573011040687561,
      "learning_rate": 9.84e-06,
      "loss": 0.3712,
      "step": 492
    },
    {
      "epoch": 0.0986,
      "grad_norm": 0.4055820405483246,
      "learning_rate": 9.86e-06,
      "loss": 0.3213,
      "step": 493
    },
    {
      "epoch": 0.0988,
      "grad_norm": 0.46741724014282227,
      "learning_rate": 9.88e-06,
      "loss": 0.3875,
      "step": 494
    },
    {
      "epoch": 0.099,
      "grad_norm": 0.5571779012680054,
      "learning_rate": 9.9e-06,
      "loss": 0.3856,
      "step": 495
    },
    {
      "epoch": 0.0992,
      "grad_norm": 0.7290095090866089,
      "learning_rate": 9.920000000000002e-06,
      "loss": 0.4029,
      "step": 496
    },
    {
      "epoch": 0.0994,
      "grad_norm": 0.5976380109786987,
      "learning_rate": 9.940000000000001e-06,
      "loss": 0.4179,
      "step": 497
    },
    {
      "epoch": 0.0996,
      "grad_norm": 0.5205994844436646,
      "learning_rate": 9.960000000000001e-06,
      "loss": 0.3603,
      "step": 498
    },
    {
      "epoch": 0.0998,
      "grad_norm": 0.5301013588905334,
      "learning_rate": 9.980000000000001e-06,
      "loss": 0.3593,
      "step": 499
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.49482277035713196,
      "learning_rate": 1e-05,
      "loss": 0.3167,
      "step": 500
    },
    {
      "epoch": 0.0002,
      "grad_norm": 0.9537463188171387,
      "learning_rate": 9.999998781530372e-06,
      "loss": 0.3416,
      "step": 501
    },
    {
      "epoch": 0.0004,
      "grad_norm": 0.5842031836509705,
      "learning_rate": 9.999995126122076e-06,
      "loss": 0.3749,
      "step": 502
    },
    {
      "epoch": 0.0006,
      "grad_norm": 0.4697686433792114,
      "learning_rate": 9.999989033776898e-06,
      "loss": 0.339,
      "step": 503
    },
    {
      "epoch": 0.0008,
      "grad_norm": 0.5015813112258911,
      "learning_rate": 9.999980504497803e-06,
      "loss": 0.3457,
      "step": 504
    },
    {
      "epoch": 0.001,
      "grad_norm": 0.46093735098838806,
      "learning_rate": 9.999969538288953e-06,
      "loss": 0.3554,
      "step": 505
    },
    {
      "epoch": 0.0012,
      "grad_norm": 0.6181058287620544,
      "learning_rate": 9.999956135155688e-06,
      "loss": 0.3769,
      "step": 506
    },
    {
      "epoch": 0.0014,
      "grad_norm": 0.5111052989959717,
      "learning_rate": 9.999940295104546e-06,
      "loss": 0.3501,
      "step": 507
    },
    {
      "epoch": 0.0016,
      "grad_norm": 0.4930100739002228,
      "learning_rate": 9.999922018143242e-06,
      "loss": 0.3728,
      "step": 508
    },
    {
      "epoch": 0.0018,
      "grad_norm": 0.5063087344169617,
      "learning_rate": 9.999901304280686e-06,
      "loss": 0.361,
      "step": 509
    },
    {
      "epoch": 0.002,
      "grad_norm": 0.48183825612068176,
      "learning_rate": 9.999878153526974e-06,
      "loss": 0.3731,
      "step": 510
    },
    {
      "epoch": 0.0022,
      "grad_norm": 0.563999354839325,
      "learning_rate": 9.99985256589339e-06,
      "loss": 0.3716,
      "step": 511
    },
    {
      "epoch": 0.0024,
      "grad_norm": 0.5149611830711365,
      "learning_rate": 9.999824541392404e-06,
      "loss": 0.3584,
      "step": 512
    },
    {
      "epoch": 0.0026,
      "grad_norm": 0.5036495327949524,
      "learning_rate": 9.999794080037675e-06,
      "loss": 0.3614,
      "step": 513
    },
    {
      "epoch": 0.0028,
      "grad_norm": 0.5326202511787415,
      "learning_rate": 9.99976118184405e-06,
      "loss": 0.3637,
      "step": 514
    },
    {
      "epoch": 0.003,
      "grad_norm": 0.7005812525749207,
      "learning_rate": 9.999725846827562e-06,
      "loss": 0.357,
      "step": 515
    },
    {
      "epoch": 0.0032,
      "grad_norm": 0.465455025434494,
      "learning_rate": 9.999688075005434e-06,
      "loss": 0.349,
      "step": 516
    },
    {
      "epoch": 0.0034,
      "grad_norm": 0.5833321213722229,
      "learning_rate": 9.999647866396073e-06,
      "loss": 0.3792,
      "step": 517
    },
    {
      "epoch": 0.0036,
      "grad_norm": 0.6884943246841431,
      "learning_rate": 9.999605221019082e-06,
      "loss": 0.3561,
      "step": 518
    },
    {
      "epoch": 0.0038,
      "grad_norm": 1.5258033275604248,
      "learning_rate": 9.999560138895238e-06,
      "loss": 0.3823,
      "step": 519
    },
    {
      "epoch": 0.004,
      "grad_norm": 0.5705844759941101,
      "learning_rate": 9.999512620046523e-06,
      "loss": 0.3703,
      "step": 520
    },
    {
      "epoch": 0.0042,
      "grad_norm": 0.46794411540031433,
      "learning_rate": 9.999462664496088e-06,
      "loss": 0.3611,
      "step": 521
    },
    {
      "epoch": 0.0044,
      "grad_norm": 0.5698813796043396,
      "learning_rate": 9.999410272268285e-06,
      "loss": 0.3458,
      "step": 522
    },
    {
      "epoch": 0.0046,
      "grad_norm": 0.5110376477241516,
      "learning_rate": 9.999355443388649e-06,
      "loss": 0.3882,
      "step": 523
    },
    {
      "epoch": 0.0048,
      "grad_norm": 0.5603966116905212,
      "learning_rate": 9.999298177883902e-06,
      "loss": 0.3721,
      "step": 524
    },
    {
      "epoch": 0.005,
      "grad_norm": 0.44076311588287354,
      "learning_rate": 9.999238475781957e-06,
      "loss": 0.3596,
      "step": 525
    },
    {
      "epoch": 0.0052,
      "grad_norm": 0.6495744585990906,
      "learning_rate": 9.999176337111908e-06,
      "loss": 0.3522,
      "step": 526
    },
    {
      "epoch": 0.0054,
      "grad_norm": 0.5070968866348267,
      "learning_rate": 9.999111761904046e-06,
      "loss": 0.3655,
      "step": 527
    },
    {
      "epoch": 0.0056,
      "grad_norm": 0.5735330581665039,
      "learning_rate": 9.99904475018984e-06,
      "loss": 0.3716,
      "step": 528
    },
    {
      "epoch": 0.0058,
      "grad_norm": 0.6356480717658997,
      "learning_rate": 9.99897530200195e-06,
      "loss": 0.3498,
      "step": 529
    },
    {
      "epoch": 0.006,
      "grad_norm": 0.46675553917884827,
      "learning_rate": 9.998903417374228e-06,
      "loss": 0.3429,
      "step": 530
    },
    {
      "epoch": 0.0062,
      "grad_norm": 0.5272665023803711,
      "learning_rate": 9.998829096341706e-06,
      "loss": 0.3543,
      "step": 531
    },
    {
      "epoch": 0.0064,
      "grad_norm": 0.4716573655605316,
      "learning_rate": 9.998752338940612e-06,
      "loss": 0.3383,
      "step": 532
    },
    {
      "epoch": 0.0066,
      "grad_norm": 0.574217677116394,
      "learning_rate": 9.998673145208351e-06,
      "loss": 0.3368,
      "step": 533
    },
    {
      "epoch": 0.0068,
      "grad_norm": 0.5093129873275757,
      "learning_rate": 9.998591515183524e-06,
      "loss": 0.3821,
      "step": 534
    },
    {
      "epoch": 0.007,
      "grad_norm": 0.49972254037857056,
      "learning_rate": 9.998507448905917e-06,
      "loss": 0.385,
      "step": 535
    },
    {
      "epoch": 0.0072,
      "grad_norm": 0.5705024600028992,
      "learning_rate": 9.9984209464165e-06,
      "loss": 0.377,
      "step": 536
    },
    {
      "epoch": 0.0074,
      "grad_norm": 0.49048206210136414,
      "learning_rate": 9.998332007757436e-06,
      "loss": 0.3523,
      "step": 537
    },
    {
      "epoch": 0.0076,
      "grad_norm": 0.5960965156555176,
      "learning_rate": 9.998240632972073e-06,
      "loss": 0.3667,
      "step": 538
    },
    {
      "epoch": 0.0078,
      "grad_norm": 0.5229523181915283,
      "learning_rate": 9.998146822104943e-06,
      "loss": 0.3215,
      "step": 539
    },
    {
      "epoch": 0.008,
      "grad_norm": 0.48719435930252075,
      "learning_rate": 9.998050575201772e-06,
      "loss": 0.3526,
      "step": 540
    },
    {
      "epoch": 0.0082,
      "grad_norm": 0.481819748878479,
      "learning_rate": 9.997951892309468e-06,
      "loss": 0.3728,
      "step": 541
    },
    {
      "epoch": 0.0084,
      "grad_norm": 0.5429572463035583,
      "learning_rate": 9.997850773476126e-06,
      "loss": 0.3737,
      "step": 542
    },
    {
      "epoch": 0.0086,
      "grad_norm": 0.6860358715057373,
      "learning_rate": 9.997747218751032e-06,
      "loss": 0.3539,
      "step": 543
    },
    {
      "epoch": 0.0088,
      "grad_norm": 0.4458191394805908,
      "learning_rate": 9.997641228184656e-06,
      "loss": 0.3619,
      "step": 544
    },
    {
      "epoch": 0.009,
      "grad_norm": 0.5856768488883972,
      "learning_rate": 9.997532801828659e-06,
      "loss": 0.3558,
      "step": 545
    },
    {
      "epoch": 0.0092,
      "grad_norm": 0.8655544519424438,
      "learning_rate": 9.997421939735885e-06,
      "loss": 0.366,
      "step": 546
    },
    {
      "epoch": 0.0094,
      "grad_norm": 0.5159733891487122,
      "learning_rate": 9.997308641960365e-06,
      "loss": 0.3427,
      "step": 547
    },
    {
      "epoch": 0.0096,
      "grad_norm": 1.1214336156845093,
      "learning_rate": 9.997192908557322e-06,
      "loss": 0.3719,
      "step": 548
    },
    {
      "epoch": 0.0098,
      "grad_norm": 0.4956130385398865,
      "learning_rate": 9.997074739583162e-06,
      "loss": 0.3436,
      "step": 549
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5146604180335999,
      "learning_rate": 9.99695413509548e-06,
      "loss": 0.374,
      "step": 550
    },
    {
      "epoch": 0.0102,
      "grad_norm": 0.49554336071014404,
      "learning_rate": 9.996831095153054e-06,
      "loss": 0.3566,
      "step": 551
    },
    {
      "epoch": 0.0104,
      "grad_norm": 0.5481516122817993,
      "learning_rate": 9.996705619815857e-06,
      "loss": 0.3944,
      "step": 552
    },
    {
      "epoch": 0.0106,
      "grad_norm": 0.5984386205673218,
      "learning_rate": 9.99657770914504e-06,
      "loss": 0.3878,
      "step": 553
    },
    {
      "epoch": 0.0108,
      "grad_norm": 0.825145959854126,
      "learning_rate": 9.996447363202947e-06,
      "loss": 0.3818,
      "step": 554
    },
    {
      "epoch": 0.011,
      "grad_norm": 0.5802706480026245,
      "learning_rate": 9.996314582053106e-06,
      "loss": 0.3369,
      "step": 555
    },
    {
      "epoch": 0.0112,
      "grad_norm": 0.5119512677192688,
      "learning_rate": 9.996179365760235e-06,
      "loss": 0.3765,
      "step": 556
    },
    {
      "epoch": 0.0114,
      "grad_norm": 0.47281232476234436,
      "learning_rate": 9.996041714390235e-06,
      "loss": 0.3952,
      "step": 557
    },
    {
      "epoch": 0.0116,
      "grad_norm": 0.44386565685272217,
      "learning_rate": 9.995901628010196e-06,
      "loss": 0.3475,
      "step": 558
    },
    {
      "epoch": 0.0118,
      "grad_norm": 0.5245848894119263,
      "learning_rate": 9.995759106688394e-06,
      "loss": 0.3421,
      "step": 559
    },
    {
      "epoch": 0.012,
      "grad_norm": 0.44147640466690063,
      "learning_rate": 9.995614150494293e-06,
      "loss": 0.3664,
      "step": 560
    },
    {
      "epoch": 0.0122,
      "grad_norm": 0.4493354558944702,
      "learning_rate": 9.995466759498543e-06,
      "loss": 0.3504,
      "step": 561
    },
    {
      "epoch": 0.0124,
      "grad_norm": 0.5113531947135925,
      "learning_rate": 9.995316933772978e-06,
      "loss": 0.3814,
      "step": 562
    },
    {
      "epoch": 0.0126,
      "grad_norm": 0.4721671938896179,
      "learning_rate": 9.995164673390624e-06,
      "loss": 0.3282,
      "step": 563
    },
    {
      "epoch": 0.0128,
      "grad_norm": 0.5598345398902893,
      "learning_rate": 9.995009978425692e-06,
      "loss": 0.3632,
      "step": 564
    },
    {
      "epoch": 0.013,
      "grad_norm": 0.7488293051719666,
      "learning_rate": 9.994852848953574e-06,
      "loss": 0.3362,
      "step": 565
    },
    {
      "epoch": 0.0132,
      "grad_norm": 0.7085559964179993,
      "learning_rate": 9.994693285050858e-06,
      "loss": 0.3487,
      "step": 566
    },
    {
      "epoch": 0.0134,
      "grad_norm": 0.574830949306488,
      "learning_rate": 9.994531286795309e-06,
      "loss": 0.3525,
      "step": 567
    },
    {
      "epoch": 0.0136,
      "grad_norm": 0.6955744624137878,
      "learning_rate": 9.994366854265886e-06,
      "loss": 0.424,
      "step": 568
    },
    {
      "epoch": 0.0138,
      "grad_norm": 0.5498460531234741,
      "learning_rate": 9.99419998754273e-06,
      "loss": 0.3628,
      "step": 569
    },
    {
      "epoch": 0.014,
      "grad_norm": 0.5392717719078064,
      "learning_rate": 9.994030686707171e-06,
      "loss": 0.3497,
      "step": 570
    },
    {
      "epoch": 0.0142,
      "grad_norm": 0.5130705237388611,
      "learning_rate": 9.993858951841724e-06,
      "loss": 0.3678,
      "step": 571
    },
    {
      "epoch": 0.0144,
      "grad_norm": 0.520224392414093,
      "learning_rate": 9.99368478303009e-06,
      "loss": 0.3546,
      "step": 572
    },
    {
      "epoch": 0.0146,
      "grad_norm": 0.5771729946136475,
      "learning_rate": 9.993508180357154e-06,
      "loss": 0.3542,
      "step": 573
    },
    {
      "epoch": 0.0148,
      "grad_norm": 0.5811485052108765,
      "learning_rate": 9.993329143908994e-06,
      "loss": 0.3588,
      "step": 574
    },
    {
      "epoch": 0.015,
      "grad_norm": 0.575252115726471,
      "learning_rate": 9.993147673772869e-06,
      "loss": 0.3437,
      "step": 575
    },
    {
      "epoch": 0.0152,
      "grad_norm": 0.44212019443511963,
      "learning_rate": 9.992963770037227e-06,
      "loss": 0.3349,
      "step": 576
    },
    {
      "epoch": 0.0154,
      "grad_norm": 0.4850029945373535,
      "learning_rate": 9.992777432791697e-06,
      "loss": 0.365,
      "step": 577
    },
    {
      "epoch": 0.0156,
      "grad_norm": 0.6545392870903015,
      "learning_rate": 9.9925886621271e-06,
      "loss": 0.3564,
      "step": 578
    },
    {
      "epoch": 0.0158,
      "grad_norm": 0.5586860775947571,
      "learning_rate": 9.992397458135438e-06,
      "loss": 0.3412,
      "step": 579
    },
    {
      "epoch": 0.016,
      "grad_norm": 0.4717925190925598,
      "learning_rate": 9.992203820909906e-06,
      "loss": 0.3589,
      "step": 580
    },
    {
      "epoch": 0.0162,
      "grad_norm": 1.3972848653793335,
      "learning_rate": 9.992007750544876e-06,
      "loss": 0.3714,
      "step": 581
    },
    {
      "epoch": 0.0164,
      "grad_norm": 0.5553995966911316,
      "learning_rate": 9.991809247135912e-06,
      "loss": 0.361,
      "step": 582
    },
    {
      "epoch": 0.0166,
      "grad_norm": 0.942405641078949,
      "learning_rate": 9.991608310779762e-06,
      "loss": 0.3549,
      "step": 583
    },
    {
      "epoch": 0.0168,
      "grad_norm": 0.5411939024925232,
      "learning_rate": 9.99140494157436e-06,
      "loss": 0.4034,
      "step": 584
    },
    {
      "epoch": 0.017,
      "grad_norm": 0.7739619612693787,
      "learning_rate": 9.991199139618828e-06,
      "loss": 0.3709,
      "step": 585
    },
    {
      "epoch": 0.0172,
      "grad_norm": 0.6578670144081116,
      "learning_rate": 9.990990905013466e-06,
      "loss": 0.3702,
      "step": 586
    },
    {
      "epoch": 0.0174,
      "grad_norm": 0.5493494272232056,
      "learning_rate": 9.99078023785977e-06,
      "loss": 0.3524,
      "step": 587
    },
    {
      "epoch": 0.0176,
      "grad_norm": 0.6822598576545715,
      "learning_rate": 9.990567138260414e-06,
      "loss": 0.3364,
      "step": 588
    },
    {
      "epoch": 0.0178,
      "grad_norm": 0.5882232785224915,
      "learning_rate": 9.990351606319261e-06,
      "loss": 0.3905,
      "step": 589
    },
    {
      "epoch": 0.018,
      "grad_norm": 0.5470965504646301,
      "learning_rate": 9.990133642141359e-06,
      "loss": 0.3599,
      "step": 590
    },
    {
      "epoch": 0.0182,
      "grad_norm": 0.5688984990119934,
      "learning_rate": 9.98991324583294e-06,
      "loss": 0.3769,
      "step": 591
    },
    {
      "epoch": 0.0184,
      "grad_norm": 0.46216684579849243,
      "learning_rate": 9.989690417501423e-06,
      "loss": 0.3426,
      "step": 592
    },
    {
      "epoch": 0.0186,
      "grad_norm": 1.4114888906478882,
      "learning_rate": 9.989465157255413e-06,
      "loss": 0.3642,
      "step": 593
    },
    {
      "epoch": 0.0188,
      "grad_norm": 0.5993971824645996,
      "learning_rate": 9.989237465204698e-06,
      "loss": 0.3643,
      "step": 594
    },
    {
      "epoch": 0.019,
      "grad_norm": 0.44965410232543945,
      "learning_rate": 9.989007341460251e-06,
      "loss": 0.314,
      "step": 595
    },
    {
      "epoch": 0.0192,
      "grad_norm": 0.44512510299682617,
      "learning_rate": 9.988774786134235e-06,
      "loss": 0.3493,
      "step": 596
    },
    {
      "epoch": 0.0194,
      "grad_norm": 0.5825225710868835,
      "learning_rate": 9.988539799339989e-06,
      "loss": 0.3536,
      "step": 597
    },
    {
      "epoch": 0.0196,
      "grad_norm": 0.6169155240058899,
      "learning_rate": 9.98830238119205e-06,
      "loss": 0.3609,
      "step": 598
    },
    {
      "epoch": 0.0198,
      "grad_norm": 0.7028898596763611,
      "learning_rate": 9.988062531806127e-06,
      "loss": 0.3193,
      "step": 599
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5416840314865112,
      "learning_rate": 9.987820251299121e-06,
      "loss": 0.3885,
      "step": 600
    },
    {
      "epoch": 0.0002,
      "grad_norm": 0.6813318133354187,
      "learning_rate": 9.987575539789119e-06,
      "loss": 0.3631,
      "step": 601
    },
    {
      "epoch": 0.0004,
      "grad_norm": 1.171585202217102,
      "learning_rate": 9.987328397395389e-06,
      "loss": 0.3627,
      "step": 602
    },
    {
      "epoch": 0.0006,
      "grad_norm": 0.7335216999053955,
      "learning_rate": 9.987078824238384e-06,
      "loss": 0.3614,
      "step": 603
    },
    {
      "epoch": 0.0008,
      "grad_norm": 0.484480082988739,
      "learning_rate": 9.986826820439743e-06,
      "loss": 0.3257,
      "step": 604
    },
    {
      "epoch": 0.001,
      "grad_norm": 0.49516794085502625,
      "learning_rate": 9.98657238612229e-06,
      "loss": 0.3508,
      "step": 605
    },
    {
      "epoch": 0.0012,
      "grad_norm": 0.5053285956382751,
      "learning_rate": 9.986315521410035e-06,
      "loss": 0.3428,
      "step": 606
    },
    {
      "epoch": 0.0014,
      "grad_norm": 0.5630417466163635,
      "learning_rate": 9.98605622642817e-06,
      "loss": 0.3487,
      "step": 607
    },
    {
      "epoch": 0.0016,
      "grad_norm": 0.44449278712272644,
      "learning_rate": 9.98579450130307e-06,
      "loss": 0.3423,
      "step": 608
    },
    {
      "epoch": 0.0018,
      "grad_norm": 0.9341311454772949,
      "learning_rate": 9.9855303461623e-06,
      "loss": 0.3762,
      "step": 609
    },
    {
      "epoch": 0.002,
      "grad_norm": 0.5018416047096252,
      "learning_rate": 9.985263761134602e-06,
      "loss": 0.3858,
      "step": 610
    },
    {
      "epoch": 0.0022,
      "grad_norm": 0.5571470260620117,
      "learning_rate": 9.98499474634991e-06,
      "loss": 0.3424,
      "step": 611
    },
    {
      "epoch": 0.0024,
      "grad_norm": 0.5043375492095947,
      "learning_rate": 9.984723301939337e-06,
      "loss": 0.3302,
      "step": 612
    },
    {
      "epoch": 0.0026,
      "grad_norm": 0.6455717086791992,
      "learning_rate": 9.98444942803518e-06,
      "loss": 0.3718,
      "step": 613
    },
    {
      "epoch": 0.0028,
      "grad_norm": 0.508201003074646,
      "learning_rate": 9.984173124770924e-06,
      "loss": 0.3301,
      "step": 614
    },
    {
      "epoch": 0.003,
      "grad_norm": 0.7399879097938538,
      "learning_rate": 9.983894392281237e-06,
      "loss": 0.3758,
      "step": 615
    },
    {
      "epoch": 0.0032,
      "grad_norm": 0.4698946177959442,
      "learning_rate": 9.983613230701967e-06,
      "loss": 0.3341,
      "step": 616
    },
    {
      "epoch": 0.0034,
      "grad_norm": 0.7075951099395752,
      "learning_rate": 9.98332964017015e-06,
      "loss": 0.3296,
      "step": 617
    },
    {
      "epoch": 0.0036,
      "grad_norm": 0.5513788461685181,
      "learning_rate": 9.983043620824005e-06,
      "loss": 0.3869,
      "step": 618
    },
    {
      "epoch": 0.0038,
      "grad_norm": 0.5875303149223328,
      "learning_rate": 9.982755172802933e-06,
      "loss": 0.3276,
      "step": 619
    },
    {
      "epoch": 0.004,
      "grad_norm": 0.46472278237342834,
      "learning_rate": 9.982464296247523e-06,
      "loss": 0.3456,
      "step": 620
    },
    {
      "epoch": 0.0042,
      "grad_norm": 0.7273411154747009,
      "learning_rate": 9.98217099129954e-06,
      "loss": 0.3593,
      "step": 621
    },
    {
      "epoch": 0.0044,
      "grad_norm": 1.2494257688522339,
      "learning_rate": 9.981875258101944e-06,
      "loss": 0.3864,
      "step": 622
    },
    {
      "epoch": 0.0046,
      "grad_norm": 0.555046796798706,
      "learning_rate": 9.981577096798864e-06,
      "loss": 0.3597,
      "step": 623
    },
    {
      "epoch": 0.0048,
      "grad_norm": 0.6468971967697144,
      "learning_rate": 9.981276507535625e-06,
      "loss": 0.3776,
      "step": 624
    },
    {
      "epoch": 0.005,
      "grad_norm": 0.6026402115821838,
      "learning_rate": 9.980973490458728e-06,
      "loss": 0.3652,
      "step": 625
    },
    {
      "epoch": 0.0052,
      "grad_norm": 0.4538074731826782,
      "learning_rate": 9.980668045715864e-06,
      "loss": 0.37,
      "step": 626
    },
    {
      "epoch": 0.0054,
      "grad_norm": 0.6768639087677002,
      "learning_rate": 9.980360173455899e-06,
      "loss": 0.411,
      "step": 627
    },
    {
      "epoch": 0.0056,
      "grad_norm": 0.4825188219547272,
      "learning_rate": 9.980049873828887e-06,
      "loss": 0.3277,
      "step": 628
    },
    {
      "epoch": 0.0058,
      "grad_norm": 0.7685976028442383,
      "learning_rate": 9.979737146986064e-06,
      "loss": 0.3043,
      "step": 629
    },
    {
      "epoch": 0.006,
      "grad_norm": 0.6729751825332642,
      "learning_rate": 9.979421993079853e-06,
      "loss": 0.3572,
      "step": 630
    },
    {
      "epoch": 0.0062,
      "grad_norm": 0.6244285106658936,
      "learning_rate": 9.979104412263851e-06,
      "loss": 0.3656,
      "step": 631
    },
    {
      "epoch": 0.0064,
      "grad_norm": 0.531461775302887,
      "learning_rate": 9.978784404692847e-06,
      "loss": 0.3349,
      "step": 632
    },
    {
      "epoch": 0.0066,
      "grad_norm": 0.5611459612846375,
      "learning_rate": 9.978461970522807e-06,
      "loss": 0.3799,
      "step": 633
    },
    {
      "epoch": 0.0068,
      "grad_norm": 0.4558219313621521,
      "learning_rate": 9.97813710991088e-06,
      "loss": 0.3531,
      "step": 634
    },
    {
      "epoch": 0.007,
      "grad_norm": 0.578061044216156,
      "learning_rate": 9.9778098230154e-06,
      "loss": 0.3405,
      "step": 635
    },
    {
      "epoch": 0.0072,
      "grad_norm": 0.5462324023246765,
      "learning_rate": 9.977480109995886e-06,
      "loss": 0.3309,
      "step": 636
    },
    {
      "epoch": 0.0074,
      "grad_norm": 0.6123208999633789,
      "learning_rate": 9.977147971013033e-06,
      "loss": 0.3717,
      "step": 637
    },
    {
      "epoch": 0.0076,
      "grad_norm": 0.7167513370513916,
      "learning_rate": 9.97681340622872e-06,
      "loss": 0.3665,
      "step": 638
    },
    {
      "epoch": 0.0078,
      "grad_norm": 0.7791906595230103,
      "learning_rate": 9.976476415806013e-06,
      "loss": 0.3903,
      "step": 639
    },
    {
      "epoch": 0.008,
      "grad_norm": 0.6967666745185852,
      "learning_rate": 9.976136999909156e-06,
      "loss": 0.3438,
      "step": 640
    },
    {
      "epoch": 0.0082,
      "grad_norm": 0.4898976981639862,
      "learning_rate": 9.975795158703576e-06,
      "loss": 0.3607,
      "step": 641
    },
    {
      "epoch": 0.0084,
      "grad_norm": 0.6834308505058289,
      "learning_rate": 9.975450892355882e-06,
      "loss": 0.3742,
      "step": 642
    },
    {
      "epoch": 0.0086,
      "grad_norm": 0.4446169435977936,
      "learning_rate": 9.975104201033868e-06,
      "loss": 0.3245,
      "step": 643
    },
    {
      "epoch": 0.0088,
      "grad_norm": 0.6030970811843872,
      "learning_rate": 9.974755084906503e-06,
      "loss": 0.3448,
      "step": 644
    },
    {
      "epoch": 0.009,
      "grad_norm": 0.5022148489952087,
      "learning_rate": 9.974403544143942e-06,
      "loss": 0.3013,
      "step": 645
    },
    {
      "epoch": 0.0092,
      "grad_norm": 0.7542896866798401,
      "learning_rate": 9.974049578917524e-06,
      "loss": 0.3633,
      "step": 646
    },
    {
      "epoch": 0.0094,
      "grad_norm": 0.46456819772720337,
      "learning_rate": 9.973693189399767e-06,
      "loss": 0.3394,
      "step": 647
    },
    {
      "epoch": 0.0096,
      "grad_norm": 0.6486778259277344,
      "learning_rate": 9.973334375764372e-06,
      "loss": 0.3384,
      "step": 648
    },
    {
      "epoch": 0.0098,
      "grad_norm": 0.4601091742515564,
      "learning_rate": 9.972973138186217e-06,
      "loss": 0.3298,
      "step": 649
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6157973408699036,
      "learning_rate": 9.972609476841368e-06,
      "loss": 0.3551,
      "step": 650
    },
    {
      "epoch": 0.0102,
      "grad_norm": 0.47196832299232483,
      "learning_rate": 9.972243391907068e-06,
      "loss": 0.3478,
      "step": 651
    },
    {
      "epoch": 0.0104,
      "grad_norm": 0.48891618847846985,
      "learning_rate": 9.97187488356174e-06,
      "loss": 0.338,
      "step": 652
    },
    {
      "epoch": 0.0106,
      "grad_norm": 0.5402987599372864,
      "learning_rate": 9.971503951984996e-06,
      "loss": 0.3692,
      "step": 653
    },
    {
      "epoch": 0.0108,
      "grad_norm": 0.5400533676147461,
      "learning_rate": 9.971130597357618e-06,
      "loss": 0.3803,
      "step": 654
    },
    {
      "epoch": 0.011,
      "grad_norm": 0.5055856108665466,
      "learning_rate": 9.970754819861577e-06,
      "loss": 0.3695,
      "step": 655
    },
    {
      "epoch": 0.0112,
      "grad_norm": 0.5789377093315125,
      "learning_rate": 9.970376619680024e-06,
      "loss": 0.3872,
      "step": 656
    },
    {
      "epoch": 0.0114,
      "grad_norm": 0.6448431015014648,
      "learning_rate": 9.969995996997285e-06,
      "loss": 0.3761,
      "step": 657
    },
    {
      "epoch": 0.0116,
      "grad_norm": 0.559043824672699,
      "learning_rate": 9.969612951998874e-06,
      "loss": 0.3753,
      "step": 658
    },
    {
      "epoch": 0.0118,
      "grad_norm": 0.5242133736610413,
      "learning_rate": 9.969227484871485e-06,
      "loss": 0.3665,
      "step": 659
    },
    {
      "epoch": 0.012,
      "grad_norm": 0.5730127692222595,
      "learning_rate": 9.968839595802982e-06,
      "loss": 0.3452,
      "step": 660
    },
    {
      "epoch": 0.0122,
      "grad_norm": 0.6680643558502197,
      "learning_rate": 9.968449284982424e-06,
      "loss": 0.3679,
      "step": 661
    },
    {
      "epoch": 0.0124,
      "grad_norm": 0.43491604924201965,
      "learning_rate": 9.968056552600043e-06,
      "loss": 0.3541,
      "step": 662
    },
    {
      "epoch": 0.0126,
      "grad_norm": 0.49970847368240356,
      "learning_rate": 9.96766139884725e-06,
      "loss": 0.3811,
      "step": 663
    },
    {
      "epoch": 0.0128,
      "grad_norm": 0.535399317741394,
      "learning_rate": 9.967263823916638e-06,
      "loss": 0.3862,
      "step": 664
    },
    {
      "epoch": 0.013,
      "grad_norm": 0.48987600207328796,
      "learning_rate": 9.966863828001982e-06,
      "loss": 0.3179,
      "step": 665
    },
    {
      "epoch": 0.0132,
      "grad_norm": 0.5325382947921753,
      "learning_rate": 9.966461411298235e-06,
      "loss": 0.3556,
      "step": 666
    },
    {
      "epoch": 0.0134,
      "grad_norm": 0.5705052614212036,
      "learning_rate": 9.966056574001528e-06,
      "loss": 0.3636,
      "step": 667
    },
    {
      "epoch": 0.0136,
      "grad_norm": 0.4482817053794861,
      "learning_rate": 9.965649316309178e-06,
      "loss": 0.3452,
      "step": 668
    },
    {
      "epoch": 0.0138,
      "grad_norm": 0.5257667899131775,
      "learning_rate": 9.965239638419673e-06,
      "loss": 0.3846,
      "step": 669
    },
    {
      "epoch": 0.014,
      "grad_norm": 0.6103948950767517,
      "learning_rate": 9.964827540532685e-06,
      "loss": 0.3375,
      "step": 670
    },
    {
      "epoch": 0.0142,
      "grad_norm": 0.43965429067611694,
      "learning_rate": 9.964413022849069e-06,
      "loss": 0.3441,
      "step": 671
    },
    {
      "epoch": 0.0144,
      "grad_norm": 0.6877450346946716,
      "learning_rate": 9.963996085570854e-06,
      "loss": 0.3502,
      "step": 672
    },
    {
      "epoch": 0.0146,
      "grad_norm": 0.4782331585884094,
      "learning_rate": 9.96357672890125e-06,
      "loss": 0.3462,
      "step": 673
    },
    {
      "epoch": 0.0148,
      "grad_norm": 0.5345056056976318,
      "learning_rate": 9.963154953044646e-06,
      "loss": 0.3736,
      "step": 674
    },
    {
      "epoch": 0.015,
      "grad_norm": 0.5718342065811157,
      "learning_rate": 9.962730758206612e-06,
      "loss": 0.3454,
      "step": 675
    },
    {
      "epoch": 0.0152,
      "grad_norm": 0.6207794547080994,
      "learning_rate": 9.962304144593893e-06,
      "loss": 0.369,
      "step": 676
    },
    {
      "epoch": 0.0154,
      "grad_norm": 0.4944224953651428,
      "learning_rate": 9.961875112414417e-06,
      "loss": 0.3632,
      "step": 677
    },
    {
      "epoch": 0.0156,
      "grad_norm": 0.4984436333179474,
      "learning_rate": 9.96144366187729e-06,
      "loss": 0.3506,
      "step": 678
    },
    {
      "epoch": 0.0158,
      "grad_norm": 0.5809533596038818,
      "learning_rate": 9.961009793192793e-06,
      "loss": 0.3456,
      "step": 679
    },
    {
      "epoch": 0.016,
      "grad_norm": 0.5047887563705444,
      "learning_rate": 9.960573506572391e-06,
      "loss": 0.3701,
      "step": 680
    },
    {
      "epoch": 0.0162,
      "grad_norm": 0.647260308265686,
      "learning_rate": 9.960134802228722e-06,
      "loss": 0.3665,
      "step": 681
    },
    {
      "epoch": 0.0164,
      "grad_norm": 0.6471899747848511,
      "learning_rate": 9.959693680375608e-06,
      "loss": 0.4181,
      "step": 682
    },
    {
      "epoch": 0.0166,
      "grad_norm": 0.6752645969390869,
      "learning_rate": 9.959250141228046e-06,
      "loss": 0.3837,
      "step": 683
    },
    {
      "epoch": 0.0168,
      "grad_norm": 0.4914401173591614,
      "learning_rate": 9.958804185002209e-06,
      "loss": 0.3479,
      "step": 684
    },
    {
      "epoch": 0.017,
      "grad_norm": 0.5045030117034912,
      "learning_rate": 9.958355811915452e-06,
      "loss": 0.3486,
      "step": 685
    },
    {
      "epoch": 0.0172,
      "grad_norm": 0.5321974754333496,
      "learning_rate": 9.957905022186309e-06,
      "loss": 0.3408,
      "step": 686
    },
    {
      "epoch": 0.0174,
      "grad_norm": 0.5780106782913208,
      "learning_rate": 9.957451816034487e-06,
      "loss": 0.3569,
      "step": 687
    },
    {
      "epoch": 0.0176,
      "grad_norm": 0.5012310147285461,
      "learning_rate": 9.956996193680874e-06,
      "loss": 0.3931,
      "step": 688
    },
    {
      "epoch": 0.0178,
      "grad_norm": 0.5640946626663208,
      "learning_rate": 9.956538155347534e-06,
      "loss": 0.3386,
      "step": 689
    },
    {
      "epoch": 0.018,
      "grad_norm": 0.5509325861930847,
      "learning_rate": 9.95607770125771e-06,
      "loss": 0.3642,
      "step": 690
    },
    {
      "epoch": 0.0182,
      "grad_norm": 0.5786334872245789,
      "learning_rate": 9.95561483163582e-06,
      "loss": 0.4091,
      "step": 691
    },
    {
      "epoch": 0.0184,
      "grad_norm": 1.0357894897460938,
      "learning_rate": 9.955149546707465e-06,
      "loss": 0.4022,
      "step": 692
    },
    {
      "epoch": 0.0186,
      "grad_norm": 0.5953634977340698,
      "learning_rate": 9.954681846699414e-06,
      "loss": 0.3813,
      "step": 693
    },
    {
      "epoch": 0.0188,
      "grad_norm": 0.4782743453979492,
      "learning_rate": 9.954211731839623e-06,
      "loss": 0.3537,
      "step": 694
    },
    {
      "epoch": 0.019,
      "grad_norm": 0.527264416217804,
      "learning_rate": 9.953739202357219e-06,
      "loss": 0.3447,
      "step": 695
    },
    {
      "epoch": 0.0192,
      "grad_norm": 0.5654644966125488,
      "learning_rate": 9.953264258482505e-06,
      "loss": 0.3747,
      "step": 696
    },
    {
      "epoch": 0.0194,
      "grad_norm": 0.5423824787139893,
      "learning_rate": 9.952786900446964e-06,
      "loss": 0.3657,
      "step": 697
    },
    {
      "epoch": 0.0196,
      "grad_norm": 0.5788365006446838,
      "learning_rate": 9.952307128483257e-06,
      "loss": 0.3443,
      "step": 698
    },
    {
      "epoch": 0.0198,
      "grad_norm": 0.5104005932807922,
      "learning_rate": 9.951824942825215e-06,
      "loss": 0.3124,
      "step": 699
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.549763023853302,
      "learning_rate": 9.951340343707852e-06,
      "loss": 0.35,
      "step": 700
    },
    {
      "epoch": 0.0202,
      "grad_norm": 0.42607367038726807,
      "learning_rate": 9.950853331367356e-06,
      "loss": 0.3265,
      "step": 701
    },
    {
      "epoch": 0.0204,
      "grad_norm": 0.6021228432655334,
      "learning_rate": 9.950363906041089e-06,
      "loss": 0.3519,
      "step": 702
    },
    {
      "epoch": 0.0206,
      "grad_norm": 0.6586760878562927,
      "learning_rate": 9.94987206796759e-06,
      "loss": 0.3532,
      "step": 703
    },
    {
      "epoch": 0.0208,
      "grad_norm": 0.5009389519691467,
      "learning_rate": 9.94937781738658e-06,
      "loss": 0.3764,
      "step": 704
    },
    {
      "epoch": 0.021,
      "grad_norm": 0.5231199860572815,
      "learning_rate": 9.948881154538946e-06,
      "loss": 0.339,
      "step": 705
    },
    {
      "epoch": 0.0212,
      "grad_norm": 0.6982216238975525,
      "learning_rate": 9.948382079666756e-06,
      "loss": 0.3538,
      "step": 706
    },
    {
      "epoch": 0.0214,
      "grad_norm": 0.5141116380691528,
      "learning_rate": 9.947880593013256e-06,
      "loss": 0.357,
      "step": 707
    },
    {
      "epoch": 0.0216,
      "grad_norm": 0.6352123618125916,
      "learning_rate": 9.947376694822861e-06,
      "loss": 0.341,
      "step": 708
    },
    {
      "epoch": 0.0218,
      "grad_norm": 0.47832244634628296,
      "learning_rate": 9.946870385341167e-06,
      "loss": 0.3365,
      "step": 709
    },
    {
      "epoch": 0.022,
      "grad_norm": 0.9573586583137512,
      "learning_rate": 9.946361664814942e-06,
      "loss": 0.3649,
      "step": 710
    },
    {
      "epoch": 0.0222,
      "grad_norm": 0.5906776785850525,
      "learning_rate": 9.945850533492132e-06,
      "loss": 0.3601,
      "step": 711
    },
    {
      "epoch": 0.0224,
      "grad_norm": 0.7457024455070496,
      "learning_rate": 9.945336991621854e-06,
      "loss": 0.377,
      "step": 712
    },
    {
      "epoch": 0.0226,
      "grad_norm": 0.5755129456520081,
      "learning_rate": 9.944821039454403e-06,
      "loss": 0.3945,
      "step": 713
    },
    {
      "epoch": 0.0228,
      "grad_norm": 0.6262719035148621,
      "learning_rate": 9.944302677241247e-06,
      "loss": 0.3405,
      "step": 714
    },
    {
      "epoch": 0.023,
      "grad_norm": 0.526841938495636,
      "learning_rate": 9.94378190523503e-06,
      "loss": 0.3486,
      "step": 715
    },
    {
      "epoch": 0.0232,
      "grad_norm": 0.5734097957611084,
      "learning_rate": 9.94325872368957e-06,
      "loss": 0.3776,
      "step": 716
    },
    {
      "epoch": 0.0234,
      "grad_norm": 0.5565292239189148,
      "learning_rate": 9.942733132859861e-06,
      "loss": 0.3162,
      "step": 717
    },
    {
      "epoch": 0.0236,
      "grad_norm": 0.6262229681015015,
      "learning_rate": 9.942205133002067e-06,
      "loss": 0.3492,
      "step": 718
    },
    {
      "epoch": 0.0238,
      "grad_norm": 0.46493783593177795,
      "learning_rate": 9.94167472437353e-06,
      "loss": 0.3827,
      "step": 719
    },
    {
      "epoch": 0.024,
      "grad_norm": 0.617500901222229,
      "learning_rate": 9.941141907232766e-06,
      "loss": 0.3815,
      "step": 720
    },
    {
      "epoch": 0.0242,
      "grad_norm": 0.433040052652359,
      "learning_rate": 9.94060668183946e-06,
      "loss": 0.3417,
      "step": 721
    },
    {
      "epoch": 0.0244,
      "grad_norm": 0.4433918297290802,
      "learning_rate": 9.940069048454478e-06,
      "loss": 0.3161,
      "step": 722
    },
    {
      "epoch": 0.0246,
      "grad_norm": 0.489742636680603,
      "learning_rate": 9.939529007339852e-06,
      "loss": 0.3531,
      "step": 723
    },
    {
      "epoch": 0.0248,
      "grad_norm": 0.5496694445610046,
      "learning_rate": 9.938986558758795e-06,
      "loss": 0.3857,
      "step": 724
    },
    {
      "epoch": 0.025,
      "grad_norm": 0.5275728702545166,
      "learning_rate": 9.938441702975689e-06,
      "loss": 0.3357,
      "step": 725
    },
    {
      "epoch": 0.0252,
      "grad_norm": 0.4730554223060608,
      "learning_rate": 9.937894440256091e-06,
      "loss": 0.3513,
      "step": 726
    },
    {
      "epoch": 0.0254,
      "grad_norm": 0.598097026348114,
      "learning_rate": 9.937344770866727e-06,
      "loss": 0.3444,
      "step": 727
    },
    {
      "epoch": 0.0256,
      "grad_norm": 0.48787403106689453,
      "learning_rate": 9.936792695075502e-06,
      "loss": 0.3564,
      "step": 728
    },
    {
      "epoch": 0.0258,
      "grad_norm": 0.6469987630844116,
      "learning_rate": 9.936238213151491e-06,
      "loss": 0.3703,
      "step": 729
    },
    {
      "epoch": 0.026,
      "grad_norm": 0.7096101641654968,
      "learning_rate": 9.93568132536494e-06,
      "loss": 0.3321,
      "step": 730
    },
    {
      "epoch": 0.0262,
      "grad_norm": 0.5522058606147766,
      "learning_rate": 9.93512203198727e-06,
      "loss": 0.3634,
      "step": 731
    },
    {
      "epoch": 0.0264,
      "grad_norm": 0.5466731190681458,
      "learning_rate": 9.934560333291077e-06,
      "loss": 0.3683,
      "step": 732
    },
    {
      "epoch": 0.0266,
      "grad_norm": 0.6212294697761536,
      "learning_rate": 9.93399622955012e-06,
      "loss": 0.3865,
      "step": 733
    },
    {
      "epoch": 0.0268,
      "grad_norm": 0.6423448324203491,
      "learning_rate": 9.93342972103934e-06,
      "loss": 0.3529,
      "step": 734
    },
    {
      "epoch": 0.027,
      "grad_norm": 0.4934785068035126,
      "learning_rate": 9.932860808034847e-06,
      "loss": 0.3423,
      "step": 735
    },
    {
      "epoch": 0.0272,
      "grad_norm": 0.48161065578460693,
      "learning_rate": 9.932289490813922e-06,
      "loss": 0.3475,
      "step": 736
    },
    {
      "epoch": 0.0274,
      "grad_norm": 0.6466953754425049,
      "learning_rate": 9.931715769655017e-06,
      "loss": 0.3851,
      "step": 737
    },
    {
      "epoch": 0.0276,
      "grad_norm": 0.4517045319080353,
      "learning_rate": 9.931139644837755e-06,
      "loss": 0.3484,
      "step": 738
    },
    {
      "epoch": 0.0278,
      "grad_norm": 0.5961320996284485,
      "learning_rate": 9.930561116642936e-06,
      "loss": 0.3727,
      "step": 739
    },
    {
      "epoch": 0.028,
      "grad_norm": 0.551659107208252,
      "learning_rate": 9.929980185352525e-06,
      "loss": 0.3421,
      "step": 740
    },
    {
      "epoch": 0.0282,
      "grad_norm": 0.616959810256958,
      "learning_rate": 9.929396851249661e-06,
      "loss": 0.3558,
      "step": 741
    },
    {
      "epoch": 0.0284,
      "grad_norm": 0.5367663502693176,
      "learning_rate": 9.928811114618658e-06,
      "loss": 0.3404,
      "step": 742
    },
    {
      "epoch": 0.0286,
      "grad_norm": 0.4423440098762512,
      "learning_rate": 9.928222975744992e-06,
      "loss": 0.3442,
      "step": 743
    },
    {
      "epoch": 0.0288,
      "grad_norm": 0.6967864632606506,
      "learning_rate": 9.927632434915315e-06,
      "loss": 0.3448,
      "step": 744
    },
    {
      "epoch": 0.029,
      "grad_norm": 0.5631203651428223,
      "learning_rate": 9.927039492417452e-06,
      "loss": 0.3703,
      "step": 745
    },
    {
      "epoch": 0.0292,
      "grad_norm": 0.541476845741272,
      "learning_rate": 9.926444148540394e-06,
      "loss": 0.393,
      "step": 746
    },
    {
      "epoch": 0.0294,
      "grad_norm": 0.5587565302848816,
      "learning_rate": 9.925846403574306e-06,
      "loss": 0.3689,
      "step": 747
    },
    {
      "epoch": 0.0296,
      "grad_norm": 0.5124462246894836,
      "learning_rate": 9.925246257810519e-06,
      "loss": 0.3697,
      "step": 748
    },
    {
      "epoch": 0.0298,
      "grad_norm": 0.5333815217018127,
      "learning_rate": 9.92464371154154e-06,
      "loss": 0.3695,
      "step": 749
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5483563542366028,
      "learning_rate": 9.924038765061042e-06,
      "loss": 0.38,
      "step": 750
    },
    {
      "epoch": 0.0302,
      "grad_norm": 0.6113625168800354,
      "learning_rate": 9.923431418663866e-06,
      "loss": 0.3555,
      "step": 751
    },
    {
      "epoch": 0.0304,
      "grad_norm": 0.48165029287338257,
      "learning_rate": 9.922821672646028e-06,
      "loss": 0.3434,
      "step": 752
    },
    {
      "epoch": 0.0306,
      "grad_norm": 0.547150731086731,
      "learning_rate": 9.922209527304709e-06,
      "loss": 0.3855,
      "step": 753
    },
    {
      "epoch": 0.0308,
      "grad_norm": 0.4787239134311676,
      "learning_rate": 9.921594982938262e-06,
      "loss": 0.3781,
      "step": 754
    },
    {
      "epoch": 0.031,
      "grad_norm": 0.4874400496482849,
      "learning_rate": 9.92097803984621e-06,
      "loss": 0.356,
      "step": 755
    },
    {
      "epoch": 0.0312,
      "grad_norm": 0.4914305508136749,
      "learning_rate": 9.920358698329242e-06,
      "loss": 0.3424,
      "step": 756
    },
    {
      "epoch": 0.0314,
      "grad_norm": 0.5431596636772156,
      "learning_rate": 9.919736958689216e-06,
      "loss": 0.3733,
      "step": 757
    },
    {
      "epoch": 0.0316,
      "grad_norm": 0.522655189037323,
      "learning_rate": 9.919112821229165e-06,
      "loss": 0.3702,
      "step": 758
    },
    {
      "epoch": 0.0318,
      "grad_norm": 0.6319332718849182,
      "learning_rate": 9.918486286253279e-06,
      "loss": 0.3578,
      "step": 759
    },
    {
      "epoch": 0.032,
      "grad_norm": 0.7058272957801819,
      "learning_rate": 9.91785735406693e-06,
      "loss": 0.3604,
      "step": 760
    },
    {
      "epoch": 0.0322,
      "grad_norm": 0.4633054733276367,
      "learning_rate": 9.91722602497665e-06,
      "loss": 0.3352,
      "step": 761
    },
    {
      "epoch": 0.0324,
      "grad_norm": 0.5158928036689758,
      "learning_rate": 9.91659229929014e-06,
      "loss": 0.3322,
      "step": 762
    },
    {
      "epoch": 0.0326,
      "grad_norm": 0.47573283314704895,
      "learning_rate": 9.915956177316269e-06,
      "loss": 0.3143,
      "step": 763
    },
    {
      "epoch": 0.0328,
      "grad_norm": 0.8012258410453796,
      "learning_rate": 9.915317659365078e-06,
      "loss": 0.3542,
      "step": 764
    },
    {
      "epoch": 0.033,
      "grad_norm": 0.5783560276031494,
      "learning_rate": 9.914676745747772e-06,
      "loss": 0.3585,
      "step": 765
    },
    {
      "epoch": 0.0332,
      "grad_norm": 0.41114407777786255,
      "learning_rate": 9.914033436776724e-06,
      "loss": 0.3519,
      "step": 766
    },
    {
      "epoch": 0.0334,
      "grad_norm": 0.5225133895874023,
      "learning_rate": 9.913387732765475e-06,
      "loss": 0.3451,
      "step": 767
    },
    {
      "epoch": 0.0336,
      "grad_norm": 0.5642378330230713,
      "learning_rate": 9.912739634028734e-06,
      "loss": 0.3561,
      "step": 768
    },
    {
      "epoch": 0.0338,
      "grad_norm": 0.4602202773094177,
      "learning_rate": 9.912089140882377e-06,
      "loss": 0.3398,
      "step": 769
    },
    {
      "epoch": 0.034,
      "grad_norm": 0.6003661751747131,
      "learning_rate": 9.911436253643445e-06,
      "loss": 0.3592,
      "step": 770
    },
    {
      "epoch": 0.0342,
      "grad_norm": 0.5082271695137024,
      "learning_rate": 9.910780972630146e-06,
      "loss": 0.3366,
      "step": 771
    },
    {
      "epoch": 0.0344,
      "grad_norm": 0.6358324885368347,
      "learning_rate": 9.91012329816186e-06,
      "loss": 0.3931,
      "step": 772
    },
    {
      "epoch": 0.0346,
      "grad_norm": 0.519363522529602,
      "learning_rate": 9.909463230559127e-06,
      "loss": 0.3699,
      "step": 773
    },
    {
      "epoch": 0.0348,
      "grad_norm": 0.49682268500328064,
      "learning_rate": 9.908800770143654e-06,
      "loss": 0.3243,
      "step": 774
    },
    {
      "epoch": 0.035,
      "grad_norm": 0.4546162188053131,
      "learning_rate": 9.908135917238321e-06,
      "loss": 0.3254,
      "step": 775
    },
    {
      "epoch": 0.0352,
      "grad_norm": 0.48771101236343384,
      "learning_rate": 9.907468672167165e-06,
      "loss": 0.3574,
      "step": 776
    },
    {
      "epoch": 0.0354,
      "grad_norm": 0.4736218750476837,
      "learning_rate": 9.906799035255395e-06,
      "loss": 0.3396,
      "step": 777
    },
    {
      "epoch": 0.0356,
      "grad_norm": 0.4813614785671234,
      "learning_rate": 9.906127006829385e-06,
      "loss": 0.3176,
      "step": 778
    },
    {
      "epoch": 0.0358,
      "grad_norm": 0.578108012676239,
      "learning_rate": 9.90545258721667e-06,
      "loss": 0.3856,
      "step": 779
    },
    {
      "epoch": 0.036,
      "grad_norm": 0.5629308223724365,
      "learning_rate": 9.904775776745959e-06,
      "loss": 0.3695,
      "step": 780
    },
    {
      "epoch": 0.0362,
      "grad_norm": 0.5662182569503784,
      "learning_rate": 9.904096575747117e-06,
      "loss": 0.3263,
      "step": 781
    },
    {
      "epoch": 0.0364,
      "grad_norm": 0.39899706840515137,
      "learning_rate": 9.903414984551178e-06,
      "loss": 0.3221,
      "step": 782
    },
    {
      "epoch": 0.0366,
      "grad_norm": 0.578458845615387,
      "learning_rate": 9.902731003490344e-06,
      "loss": 0.3033,
      "step": 783
    },
    {
      "epoch": 0.0368,
      "grad_norm": 0.577378511428833,
      "learning_rate": 9.90204463289798e-06,
      "loss": 0.3541,
      "step": 784
    },
    {
      "epoch": 0.037,
      "grad_norm": 1.3281687498092651,
      "learning_rate": 9.901355873108611e-06,
      "loss": 0.3659,
      "step": 785
    },
    {
      "epoch": 0.0372,
      "grad_norm": 0.5634615421295166,
      "learning_rate": 9.900664724457932e-06,
      "loss": 0.3447,
      "step": 786
    },
    {
      "epoch": 0.0374,
      "grad_norm": 0.5448404550552368,
      "learning_rate": 9.899971187282799e-06,
      "loss": 0.3495,
      "step": 787
    },
    {
      "epoch": 0.0376,
      "grad_norm": 0.4552437365055084,
      "learning_rate": 9.899275261921236e-06,
      "loss": 0.3485,
      "step": 788
    },
    {
      "epoch": 0.0378,
      "grad_norm": 0.44487401843070984,
      "learning_rate": 9.898576948712427e-06,
      "loss": 0.3387,
      "step": 789
    },
    {
      "epoch": 0.038,
      "grad_norm": 0.4723622798919678,
      "learning_rate": 9.89787624799672e-06,
      "loss": 0.362,
      "step": 790
    },
    {
      "epoch": 0.0382,
      "grad_norm": 0.571026623249054,
      "learning_rate": 9.897173160115633e-06,
      "loss": 0.367,
      "step": 791
    },
    {
      "epoch": 0.0384,
      "grad_norm": 0.4734776020050049,
      "learning_rate": 9.896467685411838e-06,
      "loss": 0.3333,
      "step": 792
    },
    {
      "epoch": 0.0386,
      "grad_norm": 0.4624077081680298,
      "learning_rate": 9.895759824229176e-06,
      "loss": 0.3334,
      "step": 793
    },
    {
      "epoch": 0.0388,
      "grad_norm": 0.4947701692581177,
      "learning_rate": 9.89504957691265e-06,
      "loss": 0.3385,
      "step": 794
    },
    {
      "epoch": 0.039,
      "grad_norm": 0.4425688087940216,
      "learning_rate": 9.894336943808426e-06,
      "loss": 0.359,
      "step": 795
    },
    {
      "epoch": 0.0392,
      "grad_norm": 0.6617125272750854,
      "learning_rate": 9.893621925263832e-06,
      "loss": 0.3585,
      "step": 796
    },
    {
      "epoch": 0.0394,
      "grad_norm": 0.5874060988426208,
      "learning_rate": 9.89290452162736e-06,
      "loss": 0.3932,
      "step": 797
    },
    {
      "epoch": 0.0396,
      "grad_norm": 0.49450525641441345,
      "learning_rate": 9.892184733248666e-06,
      "loss": 0.3756,
      "step": 798
    },
    {
      "epoch": 0.0398,
      "grad_norm": 0.7197376489639282,
      "learning_rate": 9.891462560478562e-06,
      "loss": 0.3345,
      "step": 799
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5487624406814575,
      "learning_rate": 9.890738003669029e-06,
      "loss": 0.3423,
      "step": 800
    },
    {
      "epoch": 0.0402,
      "grad_norm": 0.7047106027603149,
      "learning_rate": 9.890011063173207e-06,
      "loss": 0.3675,
      "step": 801
    },
    {
      "epoch": 0.0404,
      "grad_norm": 0.5573054552078247,
      "learning_rate": 9.889281739345395e-06,
      "loss": 0.3396,
      "step": 802
    },
    {
      "epoch": 0.0406,
      "grad_norm": 0.6040503978729248,
      "learning_rate": 9.88855003254106e-06,
      "loss": 0.3616,
      "step": 803
    },
    {
      "epoch": 0.0408,
      "grad_norm": 0.6594284176826477,
      "learning_rate": 9.887815943116827e-06,
      "loss": 0.363,
      "step": 804
    },
    {
      "epoch": 0.041,
      "grad_norm": 0.5454000234603882,
      "learning_rate": 9.887079471430481e-06,
      "loss": 0.3884,
      "step": 805
    },
    {
      "epoch": 0.0412,
      "grad_norm": 0.4665381610393524,
      "learning_rate": 9.886340617840968e-06,
      "loss": 0.3366,
      "step": 806
    },
    {
      "epoch": 0.0414,
      "grad_norm": 0.6960157752037048,
      "learning_rate": 9.8855993827084e-06,
      "loss": 0.339,
      "step": 807
    },
    {
      "epoch": 0.0416,
      "grad_norm": 0.43395528197288513,
      "learning_rate": 9.884855766394041e-06,
      "loss": 0.3062,
      "step": 808
    },
    {
      "epoch": 0.0418,
      "grad_norm": 0.548009991645813,
      "learning_rate": 9.884109769260326e-06,
      "loss": 0.3719,
      "step": 809
    },
    {
      "epoch": 0.042,
      "grad_norm": 4.653844356536865,
      "learning_rate": 9.883361391670841e-06,
      "loss": 0.3165,
      "step": 810
    },
    {
      "epoch": 0.0422,
      "grad_norm": 0.6159537434577942,
      "learning_rate": 9.882610633990337e-06,
      "loss": 0.3911,
      "step": 811
    },
    {
      "epoch": 0.0424,
      "grad_norm": 0.7428053021430969,
      "learning_rate": 9.881857496584726e-06,
      "loss": 0.3511,
      "step": 812
    },
    {
      "epoch": 0.0426,
      "grad_norm": 0.5820620059967041,
      "learning_rate": 9.881101979821075e-06,
      "loss": 0.395,
      "step": 813
    },
    {
      "epoch": 0.0428,
      "grad_norm": 0.7245660424232483,
      "learning_rate": 9.880344084067616e-06,
      "loss": 0.3761,
      "step": 814
    },
    {
      "epoch": 0.043,
      "grad_norm": 0.5220029950141907,
      "learning_rate": 9.879583809693737e-06,
      "loss": 0.3646,
      "step": 815
    },
    {
      "epoch": 0.0432,
      "grad_norm": 0.8018655776977539,
      "learning_rate": 9.878821157069988e-06,
      "loss": 0.364,
      "step": 816
    },
    {
      "epoch": 0.0434,
      "grad_norm": 0.6768513321876526,
      "learning_rate": 9.878056126568077e-06,
      "loss": 0.3675,
      "step": 817
    },
    {
      "epoch": 0.0436,
      "grad_norm": 0.6403437852859497,
      "learning_rate": 9.877288718560866e-06,
      "loss": 0.3379,
      "step": 818
    },
    {
      "epoch": 0.0438,
      "grad_norm": 0.44020789861679077,
      "learning_rate": 9.876518933422385e-06,
      "loss": 0.3808,
      "step": 819
    },
    {
      "epoch": 0.044,
      "grad_norm": 0.4595296084880829,
      "learning_rate": 9.875746771527817e-06,
      "loss": 0.3212,
      "step": 820
    },
    {
      "epoch": 0.0442,
      "grad_norm": 0.53700852394104,
      "learning_rate": 9.874972233253503e-06,
      "loss": 0.3323,
      "step": 821
    },
    {
      "epoch": 0.0444,
      "grad_norm": 0.47373753786087036,
      "learning_rate": 9.874195318976945e-06,
      "loss": 0.3715,
      "step": 822
    },
    {
      "epoch": 0.0446,
      "grad_norm": 0.4782754182815552,
      "learning_rate": 9.873416029076801e-06,
      "loss": 0.3709,
      "step": 823
    },
    {
      "epoch": 0.0448,
      "grad_norm": 0.5527014136314392,
      "learning_rate": 9.872634363932887e-06,
      "loss": 0.3525,
      "step": 824
    },
    {
      "epoch": 0.045,
      "grad_norm": 0.45495885610580444,
      "learning_rate": 9.871850323926178e-06,
      "loss": 0.3783,
      "step": 825
    },
    {
      "epoch": 0.0452,
      "grad_norm": 0.49007779359817505,
      "learning_rate": 9.871063909438803e-06,
      "loss": 0.3393,
      "step": 826
    },
    {
      "epoch": 0.0454,
      "grad_norm": 0.5319245457649231,
      "learning_rate": 9.870275120854055e-06,
      "loss": 0.3946,
      "step": 827
    },
    {
      "epoch": 0.0456,
      "grad_norm": 0.5281378626823425,
      "learning_rate": 9.869483958556376e-06,
      "loss": 0.3629,
      "step": 828
    },
    {
      "epoch": 0.0458,
      "grad_norm": 0.562746524810791,
      "learning_rate": 9.868690422931372e-06,
      "loss": 0.3455,
      "step": 829
    },
    {
      "epoch": 0.046,
      "grad_norm": 0.42057880759239197,
      "learning_rate": 9.867894514365802e-06,
      "loss": 0.3298,
      "step": 830
    },
    {
      "epoch": 0.0462,
      "grad_norm": 0.44129714369773865,
      "learning_rate": 9.867096233247581e-06,
      "loss": 0.3517,
      "step": 831
    },
    {
      "epoch": 0.0464,
      "grad_norm": 0.5454751253128052,
      "learning_rate": 9.866295579965782e-06,
      "loss": 0.3432,
      "step": 832
    },
    {
      "epoch": 0.0466,
      "grad_norm": 0.5025976300239563,
      "learning_rate": 9.865492554910634e-06,
      "loss": 0.3599,
      "step": 833
    },
    {
      "epoch": 0.0468,
      "grad_norm": 0.4312964677810669,
      "learning_rate": 9.86468715847352e-06,
      "loss": 0.3482,
      "step": 834
    },
    {
      "epoch": 0.047,
      "grad_norm": 0.5484687089920044,
      "learning_rate": 9.863879391046985e-06,
      "loss": 0.3368,
      "step": 835
    },
    {
      "epoch": 0.0472,
      "grad_norm": 0.43831753730773926,
      "learning_rate": 9.863069253024719e-06,
      "loss": 0.365,
      "step": 836
    },
    {
      "epoch": 0.0474,
      "grad_norm": 0.4563218057155609,
      "learning_rate": 9.862256744801576e-06,
      "loss": 0.33,
      "step": 837
    },
    {
      "epoch": 0.0476,
      "grad_norm": 0.5891410708427429,
      "learning_rate": 9.861441866773564e-06,
      "loss": 0.3333,
      "step": 838
    },
    {
      "epoch": 0.0478,
      "grad_norm": 0.5989771485328674,
      "learning_rate": 9.860624619337844e-06,
      "loss": 0.3715,
      "step": 839
    },
    {
      "epoch": 0.048,
      "grad_norm": 0.589015781879425,
      "learning_rate": 9.859805002892733e-06,
      "loss": 0.3596,
      "step": 840
    },
    {
      "epoch": 0.0482,
      "grad_norm": 0.45189985632896423,
      "learning_rate": 9.8589830178377e-06,
      "loss": 0.3518,
      "step": 841
    },
    {
      "epoch": 0.0484,
      "grad_norm": 0.5500615835189819,
      "learning_rate": 9.85815866457337e-06,
      "loss": 0.388,
      "step": 842
    },
    {
      "epoch": 0.0486,
      "grad_norm": 0.43995723128318787,
      "learning_rate": 9.857331943501527e-06,
      "loss": 0.3195,
      "step": 843
    },
    {
      "epoch": 0.0488,
      "grad_norm": 0.5426294207572937,
      "learning_rate": 9.856502855025101e-06,
      "loss": 0.3701,
      "step": 844
    },
    {
      "epoch": 0.049,
      "grad_norm": 0.44612541794776917,
      "learning_rate": 9.85567139954818e-06,
      "loss": 0.321,
      "step": 845
    },
    {
      "epoch": 0.0492,
      "grad_norm": 0.5911232829093933,
      "learning_rate": 9.854837577476008e-06,
      "loss": 0.3522,
      "step": 846
    },
    {
      "epoch": 0.0494,
      "grad_norm": 0.556076169013977,
      "learning_rate": 9.854001389214979e-06,
      "loss": 0.3721,
      "step": 847
    },
    {
      "epoch": 0.0496,
      "grad_norm": 0.7157875299453735,
      "learning_rate": 9.853162835172638e-06,
      "loss": 0.3496,
      "step": 848
    },
    {
      "epoch": 0.0498,
      "grad_norm": 0.40269339084625244,
      "learning_rate": 9.852321915757688e-06,
      "loss": 0.3218,
      "step": 849
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.4933511018753052,
      "learning_rate": 9.851478631379982e-06,
      "loss": 0.3533,
      "step": 850
    },
    {
      "epoch": 0.0502,
      "grad_norm": 0.40543311834335327,
      "learning_rate": 9.85063298245053e-06,
      "loss": 0.3109,
      "step": 851
    },
    {
      "epoch": 0.0504,
      "grad_norm": 0.5443208813667297,
      "learning_rate": 9.849784969381488e-06,
      "loss": 0.333,
      "step": 852
    },
    {
      "epoch": 0.0506,
      "grad_norm": 0.544452965259552,
      "learning_rate": 9.848934592586165e-06,
      "loss": 0.352,
      "step": 853
    },
    {
      "epoch": 0.0508,
      "grad_norm": 0.47437989711761475,
      "learning_rate": 9.84808185247903e-06,
      "loss": 0.3276,
      "step": 854
    },
    {
      "epoch": 0.051,
      "grad_norm": 0.5918927192687988,
      "learning_rate": 9.847226749475696e-06,
      "loss": 0.3711,
      "step": 855
    },
    {
      "epoch": 0.0512,
      "grad_norm": 0.4360045790672302,
      "learning_rate": 9.846369283992927e-06,
      "loss": 0.3383,
      "step": 856
    },
    {
      "epoch": 0.0514,
      "grad_norm": 0.5080694556236267,
      "learning_rate": 9.845509456448642e-06,
      "loss": 0.3427,
      "step": 857
    },
    {
      "epoch": 0.0516,
      "grad_norm": 0.5121091604232788,
      "learning_rate": 9.844647267261915e-06,
      "loss": 0.3498,
      "step": 858
    },
    {
      "epoch": 0.0518,
      "grad_norm": 0.7771952152252197,
      "learning_rate": 9.843782716852963e-06,
      "loss": 0.3488,
      "step": 859
    },
    {
      "epoch": 0.052,
      "grad_norm": 0.49918562173843384,
      "learning_rate": 9.842915805643156e-06,
      "loss": 0.3532,
      "step": 860
    },
    {
      "epoch": 0.0522,
      "grad_norm": 0.5318511128425598,
      "learning_rate": 9.84204653405502e-06,
      "loss": 0.3697,
      "step": 861
    },
    {
      "epoch": 0.0524,
      "grad_norm": 0.5993008613586426,
      "learning_rate": 9.841174902512223e-06,
      "loss": 0.3481,
      "step": 862
    },
    {
      "epoch": 0.0526,
      "grad_norm": 0.5897778868675232,
      "learning_rate": 9.84030091143959e-06,
      "loss": 0.3603,
      "step": 863
    },
    {
      "epoch": 0.0528,
      "grad_norm": 0.6109820008277893,
      "learning_rate": 9.839424561263094e-06,
      "loss": 0.3414,
      "step": 864
    },
    {
      "epoch": 0.053,
      "grad_norm": 0.544138491153717,
      "learning_rate": 9.838545852409857e-06,
      "loss": 0.3546,
      "step": 865
    },
    {
      "epoch": 0.0532,
      "grad_norm": 0.5051282644271851,
      "learning_rate": 9.83766478530815e-06,
      "loss": 0.3748,
      "step": 866
    },
    {
      "epoch": 0.0534,
      "grad_norm": 0.5266006588935852,
      "learning_rate": 9.836781360387396e-06,
      "loss": 0.3842,
      "step": 867
    },
    {
      "epoch": 0.0536,
      "grad_norm": 0.5562833547592163,
      "learning_rate": 9.835895578078165e-06,
      "loss": 0.3647,
      "step": 868
    },
    {
      "epoch": 0.0538,
      "grad_norm": 0.6228040456771851,
      "learning_rate": 9.835007438812177e-06,
      "loss": 0.3682,
      "step": 869
    },
    {
      "epoch": 0.054,
      "grad_norm": 0.546684980392456,
      "learning_rate": 9.834116943022299e-06,
      "loss": 0.355,
      "step": 870
    },
    {
      "epoch": 0.0542,
      "grad_norm": 0.5181378722190857,
      "learning_rate": 9.833224091142548e-06,
      "loss": 0.3616,
      "step": 871
    },
    {
      "epoch": 0.0544,
      "grad_norm": 0.5290752053260803,
      "learning_rate": 9.832328883608088e-06,
      "loss": 0.3318,
      "step": 872
    },
    {
      "epoch": 0.0546,
      "grad_norm": 0.6761295795440674,
      "learning_rate": 9.831431320855235e-06,
      "loss": 0.3514,
      "step": 873
    },
    {
      "epoch": 0.0548,
      "grad_norm": 0.49282899498939514,
      "learning_rate": 9.830531403321451e-06,
      "loss": 0.3127,
      "step": 874
    },
    {
      "epoch": 0.055,
      "grad_norm": 0.5205821394920349,
      "learning_rate": 9.829629131445342e-06,
      "loss": 0.3714,
      "step": 875
    },
    {
      "epoch": 0.0552,
      "grad_norm": 0.4778885245323181,
      "learning_rate": 9.828724505666664e-06,
      "loss": 0.3342,
      "step": 876
    },
    {
      "epoch": 0.0554,
      "grad_norm": 0.5599461197853088,
      "learning_rate": 9.827817526426324e-06,
      "loss": 0.3599,
      "step": 877
    },
    {
      "epoch": 0.0556,
      "grad_norm": 0.5918222665786743,
      "learning_rate": 9.82690819416637e-06,
      "loss": 0.364,
      "step": 878
    },
    {
      "epoch": 0.0558,
      "grad_norm": 0.457160085439682,
      "learning_rate": 9.825996509330001e-06,
      "loss": 0.3132,
      "step": 879
    },
    {
      "epoch": 0.056,
      "grad_norm": 0.45433831214904785,
      "learning_rate": 9.825082472361558e-06,
      "loss": 0.3516,
      "step": 880
    },
    {
      "epoch": 0.0562,
      "grad_norm": 2.2120816707611084,
      "learning_rate": 9.824166083706534e-06,
      "loss": 0.3274,
      "step": 881
    },
    {
      "epoch": 0.0564,
      "grad_norm": 0.6137505173683167,
      "learning_rate": 9.823247343811567e-06,
      "loss": 0.3592,
      "step": 882
    },
    {
      "epoch": 0.0566,
      "grad_norm": 0.4543190598487854,
      "learning_rate": 9.822326253124436e-06,
      "loss": 0.3405,
      "step": 883
    },
    {
      "epoch": 0.0568,
      "grad_norm": 0.4767962396144867,
      "learning_rate": 9.821402812094074e-06,
      "loss": 0.3597,
      "step": 884
    },
    {
      "epoch": 0.057,
      "grad_norm": 0.5105297565460205,
      "learning_rate": 9.82047702117055e-06,
      "loss": 0.3515,
      "step": 885
    },
    {
      "epoch": 0.0572,
      "grad_norm": 0.5078163146972656,
      "learning_rate": 9.819548880805087e-06,
      "loss": 0.3741,
      "step": 886
    },
    {
      "epoch": 0.0574,
      "grad_norm": 0.43719884753227234,
      "learning_rate": 9.81861839145005e-06,
      "loss": 0.3051,
      "step": 887
    },
    {
      "epoch": 0.0576,
      "grad_norm": 0.6590076088905334,
      "learning_rate": 9.817685553558945e-06,
      "loss": 0.3633,
      "step": 888
    },
    {
      "epoch": 0.0578,
      "grad_norm": 0.5049616694450378,
      "learning_rate": 9.816750367586424e-06,
      "loss": 0.3527,
      "step": 889
    },
    {
      "epoch": 0.058,
      "grad_norm": 0.6090893149375916,
      "learning_rate": 9.815812833988292e-06,
      "loss": 0.3733,
      "step": 890
    },
    {
      "epoch": 0.0582,
      "grad_norm": 0.4779634177684784,
      "learning_rate": 9.814872953221487e-06,
      "loss": 0.3461,
      "step": 891
    },
    {
      "epoch": 0.0584,
      "grad_norm": 0.4599948227405548,
      "learning_rate": 9.813930725744095e-06,
      "loss": 0.3588,
      "step": 892
    },
    {
      "epoch": 0.0586,
      "grad_norm": 0.5123443007469177,
      "learning_rate": 9.812986152015349e-06,
      "loss": 0.3287,
      "step": 893
    },
    {
      "epoch": 0.0588,
      "grad_norm": 0.5607577562332153,
      "learning_rate": 9.81203923249562e-06,
      "loss": 0.3743,
      "step": 894
    },
    {
      "epoch": 0.059,
      "grad_norm": 0.43641480803489685,
      "learning_rate": 9.811089967646427e-06,
      "loss": 0.3722,
      "step": 895
    },
    {
      "epoch": 0.0592,
      "grad_norm": 0.4887502193450928,
      "learning_rate": 9.81013835793043e-06,
      "loss": 0.3314,
      "step": 896
    },
    {
      "epoch": 0.0594,
      "grad_norm": 0.9616177082061768,
      "learning_rate": 9.809184403811432e-06,
      "loss": 0.3842,
      "step": 897
    },
    {
      "epoch": 0.0596,
      "grad_norm": 0.42374488711357117,
      "learning_rate": 9.808228105754378e-06,
      "loss": 0.3064,
      "step": 898
    },
    {
      "epoch": 0.0598,
      "grad_norm": 0.739493727684021,
      "learning_rate": 9.807269464225355e-06,
      "loss": 0.3491,
      "step": 899
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5326677560806274,
      "learning_rate": 9.806308479691595e-06,
      "loss": 0.3746,
      "step": 900
    },
    {
      "epoch": 0.0602,
      "grad_norm": 0.4692981541156769,
      "learning_rate": 9.80534515262147e-06,
      "loss": 0.331,
      "step": 901
    },
    {
      "epoch": 0.0604,
      "grad_norm": 0.6925827860832214,
      "learning_rate": 9.804379483484493e-06,
      "loss": 0.3514,
      "step": 902
    },
    {
      "epoch": 0.0606,
      "grad_norm": 0.5632583498954773,
      "learning_rate": 9.803411472751321e-06,
      "loss": 0.3374,
      "step": 903
    },
    {
      "epoch": 0.0608,
      "grad_norm": 0.45331987738609314,
      "learning_rate": 9.80244112089375e-06,
      "loss": 0.3542,
      "step": 904
    },
    {
      "epoch": 0.061,
      "grad_norm": 0.4388485848903656,
      "learning_rate": 9.801468428384716e-06,
      "loss": 0.3388,
      "step": 905
    },
    {
      "epoch": 0.0612,
      "grad_norm": 0.46028193831443787,
      "learning_rate": 9.8004933956983e-06,
      "loss": 0.3491,
      "step": 906
    },
    {
      "epoch": 0.0614,
      "grad_norm": 0.5567423105239868,
      "learning_rate": 9.799516023309719e-06,
      "loss": 0.3426,
      "step": 907
    },
    {
      "epoch": 0.0616,
      "grad_norm": 0.5695452690124512,
      "learning_rate": 9.798536311695334e-06,
      "loss": 0.3465,
      "step": 908
    },
    {
      "epoch": 0.0618,
      "grad_norm": 0.44842109084129333,
      "learning_rate": 9.797554261332644e-06,
      "loss": 0.3331,
      "step": 909
    },
    {
      "epoch": 0.062,
      "grad_norm": 0.4669300317764282,
      "learning_rate": 9.796569872700287e-06,
      "loss": 0.3753,
      "step": 910
    },
    {
      "epoch": 0.0622,
      "grad_norm": 0.566696286201477,
      "learning_rate": 9.795583146278047e-06,
      "loss": 0.3329,
      "step": 911
    },
    {
      "epoch": 0.0624,
      "grad_norm": 0.5538137555122375,
      "learning_rate": 9.794594082546835e-06,
      "loss": 0.3357,
      "step": 912
    },
    {
      "epoch": 0.0626,
      "grad_norm": 0.4893159866333008,
      "learning_rate": 9.793602681988714e-06,
      "loss": 0.3392,
      "step": 913
    },
    {
      "epoch": 0.0628,
      "grad_norm": 0.4208252727985382,
      "learning_rate": 9.79260894508688e-06,
      "loss": 0.3013,
      "step": 914
    },
    {
      "epoch": 0.063,
      "grad_norm": 0.5416717529296875,
      "learning_rate": 9.791612872325667e-06,
      "loss": 0.3668,
      "step": 915
    },
    {
      "epoch": 0.0632,
      "grad_norm": 0.49242863059043884,
      "learning_rate": 9.79061446419055e-06,
      "loss": 0.3555,
      "step": 916
    },
    {
      "epoch": 0.0634,
      "grad_norm": 0.5123218894004822,
      "learning_rate": 9.789613721168138e-06,
      "loss": 0.3778,
      "step": 917
    },
    {
      "epoch": 0.0636,
      "grad_norm": 0.4970109462738037,
      "learning_rate": 9.788610643746184e-06,
      "loss": 0.3322,
      "step": 918
    },
    {
      "epoch": 0.0638,
      "grad_norm": 0.49123039841651917,
      "learning_rate": 9.787605232413575e-06,
      "loss": 0.3426,
      "step": 919
    },
    {
      "epoch": 0.064,
      "grad_norm": 0.5318288803100586,
      "learning_rate": 9.786597487660336e-06,
      "loss": 0.3345,
      "step": 920
    },
    {
      "epoch": 0.0642,
      "grad_norm": 0.45868247747421265,
      "learning_rate": 9.785587409977632e-06,
      "loss": 0.3418,
      "step": 921
    },
    {
      "epoch": 0.0644,
      "grad_norm": 0.7267519235610962,
      "learning_rate": 9.784574999857757e-06,
      "loss": 0.3574,
      "step": 922
    },
    {
      "epoch": 0.0646,
      "grad_norm": 0.43025898933410645,
      "learning_rate": 9.783560257794153e-06,
      "loss": 0.34,
      "step": 923
    },
    {
      "epoch": 0.0648,
      "grad_norm": 0.43988820910453796,
      "learning_rate": 9.78254318428139e-06,
      "loss": 0.3401,
      "step": 924
    },
    {
      "epoch": 0.065,
      "grad_norm": 0.8498095870018005,
      "learning_rate": 9.781523779815178e-06,
      "loss": 0.3627,
      "step": 925
    },
    {
      "epoch": 0.0652,
      "grad_norm": 0.4160510003566742,
      "learning_rate": 9.780502044892363e-06,
      "loss": 0.344,
      "step": 926
    },
    {
      "epoch": 0.0654,
      "grad_norm": 0.5060371160507202,
      "learning_rate": 9.779477980010924e-06,
      "loss": 0.3905,
      "step": 927
    },
    {
      "epoch": 0.0656,
      "grad_norm": 0.5612655282020569,
      "learning_rate": 9.778451585669982e-06,
      "loss": 0.3908,
      "step": 928
    },
    {
      "epoch": 0.0658,
      "grad_norm": 0.49234381318092346,
      "learning_rate": 9.777422862369782e-06,
      "loss": 0.355,
      "step": 929
    },
    {
      "epoch": 0.066,
      "grad_norm": 0.47796159982681274,
      "learning_rate": 9.776391810611719e-06,
      "loss": 0.3738,
      "step": 930
    },
    {
      "epoch": 0.0662,
      "grad_norm": 0.4581078886985779,
      "learning_rate": 9.775358430898311e-06,
      "loss": 0.3497,
      "step": 931
    },
    {
      "epoch": 0.0664,
      "grad_norm": 0.45687854290008545,
      "learning_rate": 9.774322723733216e-06,
      "loss": 0.3394,
      "step": 932
    },
    {
      "epoch": 0.0666,
      "grad_norm": 0.5134704113006592,
      "learning_rate": 9.773284689621223e-06,
      "loss": 0.3607,
      "step": 933
    },
    {
      "epoch": 0.0668,
      "grad_norm": 0.5391189455986023,
      "learning_rate": 9.772244329068261e-06,
      "loss": 0.3255,
      "step": 934
    },
    {
      "epoch": 0.067,
      "grad_norm": 0.49035733938217163,
      "learning_rate": 9.771201642581384e-06,
      "loss": 0.3613,
      "step": 935
    },
    {
      "epoch": 0.0672,
      "grad_norm": 0.5716395974159241,
      "learning_rate": 9.77015663066879e-06,
      "loss": 0.3986,
      "step": 936
    },
    {
      "epoch": 0.0674,
      "grad_norm": 0.6096753478050232,
      "learning_rate": 9.769109293839803e-06,
      "loss": 0.3796,
      "step": 937
    },
    {
      "epoch": 0.0676,
      "grad_norm": 0.5207598209381104,
      "learning_rate": 9.768059632604881e-06,
      "loss": 0.3753,
      "step": 938
    },
    {
      "epoch": 0.0678,
      "grad_norm": 0.48279574513435364,
      "learning_rate": 9.767007647475618e-06,
      "loss": 0.3497,
      "step": 939
    },
    {
      "epoch": 0.068,
      "grad_norm": 0.6509814262390137,
      "learning_rate": 9.765953338964736e-06,
      "loss": 0.3631,
      "step": 940
    },
    {
      "epoch": 0.0682,
      "grad_norm": 0.520702064037323,
      "learning_rate": 9.764896707586095e-06,
      "loss": 0.3521,
      "step": 941
    },
    {
      "epoch": 0.0684,
      "grad_norm": 0.4432623088359833,
      "learning_rate": 9.763837753854684e-06,
      "loss": 0.3191,
      "step": 942
    },
    {
      "epoch": 0.0686,
      "grad_norm": 0.586807906627655,
      "learning_rate": 9.762776478286622e-06,
      "loss": 0.3545,
      "step": 943
    },
    {
      "epoch": 0.0688,
      "grad_norm": 0.4631580412387848,
      "learning_rate": 9.761712881399164e-06,
      "loss": 0.3682,
      "step": 944
    },
    {
      "epoch": 0.069,
      "grad_norm": 0.4819163978099823,
      "learning_rate": 9.760646963710694e-06,
      "loss": 0.3543,
      "step": 945
    },
    {
      "epoch": 0.0692,
      "grad_norm": 0.4699559509754181,
      "learning_rate": 9.759578725740726e-06,
      "loss": 0.39,
      "step": 946
    },
    {
      "epoch": 0.0694,
      "grad_norm": 0.44822317361831665,
      "learning_rate": 9.758508168009908e-06,
      "loss": 0.358,
      "step": 947
    },
    {
      "epoch": 0.0696,
      "grad_norm": 0.43810558319091797,
      "learning_rate": 9.757435291040016e-06,
      "loss": 0.3119,
      "step": 948
    },
    {
      "epoch": 0.0698,
      "grad_norm": 0.4554559290409088,
      "learning_rate": 9.756360095353957e-06,
      "loss": 0.3528,
      "step": 949
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0522255897521973,
      "learning_rate": 9.755282581475769e-06,
      "loss": 0.3352,
      "step": 950
    },
    {
      "epoch": 0.0702,
      "grad_norm": 0.596693217754364,
      "learning_rate": 9.754202749930618e-06,
      "loss": 0.3179,
      "step": 951
    },
    {
      "epoch": 0.0704,
      "grad_norm": 0.4828733503818512,
      "learning_rate": 9.7531206012448e-06,
      "loss": 0.3882,
      "step": 952
    },
    {
      "epoch": 0.0706,
      "grad_norm": 0.5270678997039795,
      "learning_rate": 9.752036135945743e-06,
      "loss": 0.3667,
      "step": 953
    },
    {
      "epoch": 0.0708,
      "grad_norm": 0.5387672185897827,
      "learning_rate": 9.750949354562006e-06,
      "loss": 0.3366,
      "step": 954
    },
    {
      "epoch": 0.071,
      "grad_norm": 0.4902909994125366,
      "learning_rate": 9.749860257623262e-06,
      "loss": 0.3245,
      "step": 955
    },
    {
      "epoch": 0.0712,
      "grad_norm": 0.484833687543869,
      "learning_rate": 9.748768845660335e-06,
      "loss": 0.3763,
      "step": 956
    },
    {
      "epoch": 0.0714,
      "grad_norm": 0.6408721804618835,
      "learning_rate": 9.74767511920516e-06,
      "loss": 0.356,
      "step": 957
    },
    {
      "epoch": 0.0716,
      "grad_norm": 1.179010033607483,
      "learning_rate": 9.746579078790808e-06,
      "loss": 0.354,
      "step": 958
    },
    {
      "epoch": 0.0718,
      "grad_norm": 0.4905582070350647,
      "learning_rate": 9.745480724951473e-06,
      "loss": 0.3761,
      "step": 959
    },
    {
      "epoch": 0.072,
      "grad_norm": 0.47172144055366516,
      "learning_rate": 9.744380058222483e-06,
      "loss": 0.3582,
      "step": 960
    },
    {
      "epoch": 0.0722,
      "grad_norm": 0.6057941317558289,
      "learning_rate": 9.743277079140288e-06,
      "loss": 0.3578,
      "step": 961
    },
    {
      "epoch": 0.0724,
      "grad_norm": 0.5409473180770874,
      "learning_rate": 9.742171788242468e-06,
      "loss": 0.3586,
      "step": 962
    },
    {
      "epoch": 0.0726,
      "grad_norm": 0.4954855740070343,
      "learning_rate": 9.741064186067723e-06,
      "loss": 0.3525,
      "step": 963
    },
    {
      "epoch": 0.0728,
      "grad_norm": 0.48136821389198303,
      "learning_rate": 9.739954273155892e-06,
      "loss": 0.3458,
      "step": 964
    },
    {
      "epoch": 0.073,
      "grad_norm": 0.5001223087310791,
      "learning_rate": 9.73884205004793e-06,
      "loss": 0.3368,
      "step": 965
    },
    {
      "epoch": 0.0732,
      "grad_norm": 0.4765426516532898,
      "learning_rate": 9.73772751728592e-06,
      "loss": 0.4035,
      "step": 966
    },
    {
      "epoch": 0.0734,
      "grad_norm": 0.5063475370407104,
      "learning_rate": 9.736610675413073e-06,
      "loss": 0.3712,
      "step": 967
    },
    {
      "epoch": 0.0736,
      "grad_norm": 0.5638390779495239,
      "learning_rate": 9.735491524973723e-06,
      "loss": 0.3617,
      "step": 968
    },
    {
      "epoch": 0.0738,
      "grad_norm": 0.5299999117851257,
      "learning_rate": 9.73437006651333e-06,
      "loss": 0.396,
      "step": 969
    },
    {
      "epoch": 0.074,
      "grad_norm": 0.5311089754104614,
      "learning_rate": 9.733246300578482e-06,
      "loss": 0.3468,
      "step": 970
    },
    {
      "epoch": 0.0742,
      "grad_norm": 0.4778166115283966,
      "learning_rate": 9.732120227716887e-06,
      "loss": 0.3228,
      "step": 971
    },
    {
      "epoch": 0.0744,
      "grad_norm": 0.6371737718582153,
      "learning_rate": 9.73099184847738e-06,
      "loss": 0.3463,
      "step": 972
    },
    {
      "epoch": 0.0746,
      "grad_norm": 0.5534994006156921,
      "learning_rate": 9.72986116340992e-06,
      "loss": 0.3477,
      "step": 973
    },
    {
      "epoch": 0.0748,
      "grad_norm": 0.6577566862106323,
      "learning_rate": 9.728728173065584e-06,
      "loss": 0.4319,
      "step": 974
    },
    {
      "epoch": 0.075,
      "grad_norm": 0.4764620065689087,
      "learning_rate": 9.727592877996585e-06,
      "loss": 0.3641,
      "step": 975
    },
    {
      "epoch": 0.0752,
      "grad_norm": 0.48566463589668274,
      "learning_rate": 9.726455278756249e-06,
      "loss": 0.3784,
      "step": 976
    },
    {
      "epoch": 0.0754,
      "grad_norm": 0.43354105949401855,
      "learning_rate": 9.725315375899025e-06,
      "loss": 0.3572,
      "step": 977
    },
    {
      "epoch": 0.0756,
      "grad_norm": 0.4650414288043976,
      "learning_rate": 9.724173169980492e-06,
      "loss": 0.3501,
      "step": 978
    },
    {
      "epoch": 0.0758,
      "grad_norm": 0.4581211805343628,
      "learning_rate": 9.723028661557345e-06,
      "loss": 0.3382,
      "step": 979
    },
    {
      "epoch": 0.076,
      "grad_norm": 0.4273165166378021,
      "learning_rate": 9.721881851187406e-06,
      "loss": 0.3207,
      "step": 980
    },
    {
      "epoch": 0.0762,
      "grad_norm": 0.6024993062019348,
      "learning_rate": 9.720732739429614e-06,
      "loss": 0.3306,
      "step": 981
    },
    {
      "epoch": 0.0764,
      "grad_norm": 0.5830469131469727,
      "learning_rate": 9.719581326844033e-06,
      "loss": 0.3984,
      "step": 982
    },
    {
      "epoch": 0.0766,
      "grad_norm": 0.5653430223464966,
      "learning_rate": 9.718427613991848e-06,
      "loss": 0.3375,
      "step": 983
    },
    {
      "epoch": 0.0768,
      "grad_norm": 0.49985766410827637,
      "learning_rate": 9.717271601435363e-06,
      "loss": 0.3436,
      "step": 984
    },
    {
      "epoch": 0.077,
      "grad_norm": 0.602556586265564,
      "learning_rate": 9.716113289738005e-06,
      "loss": 0.3382,
      "step": 985
    },
    {
      "epoch": 0.0772,
      "grad_norm": 0.4498375952243805,
      "learning_rate": 9.714952679464324e-06,
      "loss": 0.346,
      "step": 986
    },
    {
      "epoch": 0.0774,
      "grad_norm": 0.5203672647476196,
      "learning_rate": 9.713789771179983e-06,
      "loss": 0.3738,
      "step": 987
    },
    {
      "epoch": 0.0776,
      "grad_norm": 0.48499685525894165,
      "learning_rate": 9.712624565451772e-06,
      "loss": 0.3573,
      "step": 988
    },
    {
      "epoch": 0.0778,
      "grad_norm": 0.5018429160118103,
      "learning_rate": 9.711457062847596e-06,
      "loss": 0.3516,
      "step": 989
    },
    {
      "epoch": 0.078,
      "grad_norm": 0.5175287127494812,
      "learning_rate": 9.710287263936485e-06,
      "loss": 0.3726,
      "step": 990
    },
    {
      "epoch": 0.0782,
      "grad_norm": 0.568381667137146,
      "learning_rate": 9.709115169288582e-06,
      "loss": 0.3276,
      "step": 991
    },
    {
      "epoch": 0.0784,
      "grad_norm": 0.4742743968963623,
      "learning_rate": 9.707940779475151e-06,
      "loss": 0.3486,
      "step": 992
    },
    {
      "epoch": 0.0786,
      "grad_norm": 0.4560515284538269,
      "learning_rate": 9.706764095068579e-06,
      "loss": 0.3358,
      "step": 993
    },
    {
      "epoch": 0.0788,
      "grad_norm": 0.5308396816253662,
      "learning_rate": 9.705585116642364e-06,
      "loss": 0.3565,
      "step": 994
    },
    {
      "epoch": 0.079,
      "grad_norm": 0.4934135675430298,
      "learning_rate": 9.704403844771128e-06,
      "loss": 0.2929,
      "step": 995
    },
    {
      "epoch": 0.0792,
      "grad_norm": 0.5516948699951172,
      "learning_rate": 9.703220280030607e-06,
      "loss": 0.3526,
      "step": 996
    },
    {
      "epoch": 0.0794,
      "grad_norm": 0.4497189223766327,
      "learning_rate": 9.702034422997658e-06,
      "loss": 0.372,
      "step": 997
    },
    {
      "epoch": 0.0796,
      "grad_norm": 0.4725744128227234,
      "learning_rate": 9.700846274250252e-06,
      "loss": 0.3255,
      "step": 998
    },
    {
      "epoch": 0.0798,
      "grad_norm": 0.5338658690452576,
      "learning_rate": 9.699655834367479e-06,
      "loss": 0.3626,
      "step": 999
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.4971168637275696,
      "learning_rate": 9.698463103929542e-06,
      "loss": 0.3351,
      "step": 1000
    },
    {
      "epoch": 0.0802,
      "grad_norm": 0.42593902349472046,
      "learning_rate": 9.697268083517767e-06,
      "loss": 0.3479,
      "step": 1001
    },
    {
      "epoch": 0.0804,
      "grad_norm": 0.48535603284835815,
      "learning_rate": 9.696070773714592e-06,
      "loss": 0.3457,
      "step": 1002
    },
    {
      "epoch": 0.0806,
      "grad_norm": 0.5903659462928772,
      "learning_rate": 9.69487117510357e-06,
      "loss": 0.3709,
      "step": 1003
    },
    {
      "epoch": 0.0808,
      "grad_norm": 0.45080164074897766,
      "learning_rate": 9.693669288269371e-06,
      "loss": 0.333,
      "step": 1004
    },
    {
      "epoch": 0.081,
      "grad_norm": 0.7654392123222351,
      "learning_rate": 9.69246511379778e-06,
      "loss": 0.3345,
      "step": 1005
    },
    {
      "epoch": 0.0812,
      "grad_norm": 0.715186357498169,
      "learning_rate": 9.691258652275698e-06,
      "loss": 0.3412,
      "step": 1006
    },
    {
      "epoch": 0.0814,
      "grad_norm": 0.8522730469703674,
      "learning_rate": 9.690049904291139e-06,
      "loss": 0.3673,
      "step": 1007
    },
    {
      "epoch": 0.0816,
      "grad_norm": 0.518058180809021,
      "learning_rate": 9.68883887043323e-06,
      "loss": 0.3676,
      "step": 1008
    },
    {
      "epoch": 0.0818,
      "grad_norm": 0.656116783618927,
      "learning_rate": 9.687625551292219e-06,
      "loss": 0.3335,
      "step": 1009
    },
    {
      "epoch": 0.082,
      "grad_norm": 0.6337675452232361,
      "learning_rate": 9.68640994745946e-06,
      "loss": 0.3492,
      "step": 1010
    },
    {
      "epoch": 0.0822,
      "grad_norm": 0.516303539276123,
      "learning_rate": 9.68519205952742e-06,
      "loss": 0.3476,
      "step": 1011
    },
    {
      "epoch": 0.0824,
      "grad_norm": 0.4676983058452606,
      "learning_rate": 9.68397188808969e-06,
      "loss": 0.3379,
      "step": 1012
    },
    {
      "epoch": 0.0826,
      "grad_norm": 0.518715500831604,
      "learning_rate": 9.682749433740963e-06,
      "loss": 0.3377,
      "step": 1013
    },
    {
      "epoch": 0.0828,
      "grad_norm": 0.5915587544441223,
      "learning_rate": 9.681524697077047e-06,
      "loss": 0.3421,
      "step": 1014
    },
    {
      "epoch": 0.083,
      "grad_norm": 0.6257730722427368,
      "learning_rate": 9.680297678694867e-06,
      "loss": 0.3954,
      "step": 1015
    },
    {
      "epoch": 0.0832,
      "grad_norm": 0.9275712966918945,
      "learning_rate": 9.679068379192455e-06,
      "loss": 0.356,
      "step": 1016
    },
    {
      "epoch": 0.0834,
      "grad_norm": 0.5491714477539062,
      "learning_rate": 9.677836799168958e-06,
      "loss": 0.3249,
      "step": 1017
    },
    {
      "epoch": 0.0836,
      "grad_norm": 0.5032721161842346,
      "learning_rate": 9.67660293922463e-06,
      "loss": 0.362,
      "step": 1018
    },
    {
      "epoch": 0.0838,
      "grad_norm": 0.5065836906433105,
      "learning_rate": 9.675366799960842e-06,
      "loss": 0.3464,
      "step": 1019
    },
    {
      "epoch": 0.084,
      "grad_norm": 0.45900025963783264,
      "learning_rate": 9.674128381980073e-06,
      "loss": 0.374,
      "step": 1020
    },
    {
      "epoch": 0.0842,
      "grad_norm": 0.5183061957359314,
      "learning_rate": 9.672887685885913e-06,
      "loss": 0.3594,
      "step": 1021
    },
    {
      "epoch": 0.0844,
      "grad_norm": 0.5346204042434692,
      "learning_rate": 9.671644712283061e-06,
      "loss": 0.3571,
      "step": 1022
    },
    {
      "epoch": 0.0846,
      "grad_norm": 0.5771331787109375,
      "learning_rate": 9.670399461777328e-06,
      "loss": 0.345,
      "step": 1023
    },
    {
      "epoch": 0.0848,
      "grad_norm": 0.521988034248352,
      "learning_rate": 9.669151934975635e-06,
      "loss": 0.352,
      "step": 1024
    },
    {
      "epoch": 0.085,
      "grad_norm": 0.5415619611740112,
      "learning_rate": 9.667902132486009e-06,
      "loss": 0.357,
      "step": 1025
    },
    {
      "epoch": 0.0852,
      "grad_norm": 0.48480916023254395,
      "learning_rate": 9.666650054917591e-06,
      "loss": 0.3339,
      "step": 1026
    },
    {
      "epoch": 0.0854,
      "grad_norm": 0.49510812759399414,
      "learning_rate": 9.665395702880627e-06,
      "loss": 0.3152,
      "step": 1027
    },
    {
      "epoch": 0.0856,
      "grad_norm": 0.49149850010871887,
      "learning_rate": 9.664139076986473e-06,
      "loss": 0.3548,
      "step": 1028
    },
    {
      "epoch": 0.0858,
      "grad_norm": 0.5364273190498352,
      "learning_rate": 9.662880177847595e-06,
      "loss": 0.3565,
      "step": 1029
    },
    {
      "epoch": 0.086,
      "grad_norm": 0.5022346377372742,
      "learning_rate": 9.661619006077562e-06,
      "loss": 0.3143,
      "step": 1030
    },
    {
      "epoch": 0.0862,
      "grad_norm": 0.5110144019126892,
      "learning_rate": 9.660355562291055e-06,
      "loss": 0.3709,
      "step": 1031
    },
    {
      "epoch": 0.0864,
      "grad_norm": 1.154586911201477,
      "learning_rate": 9.659089847103863e-06,
      "loss": 0.3452,
      "step": 1032
    },
    {
      "epoch": 0.0866,
      "grad_norm": 0.5654928684234619,
      "learning_rate": 9.65782186113288e-06,
      "loss": 0.3333,
      "step": 1033
    },
    {
      "epoch": 0.0868,
      "grad_norm": 0.5360839366912842,
      "learning_rate": 9.656551604996102e-06,
      "loss": 0.357,
      "step": 1034
    },
    {
      "epoch": 0.087,
      "grad_norm": 0.5790228247642517,
      "learning_rate": 9.655279079312643e-06,
      "loss": 0.3693,
      "step": 1035
    },
    {
      "epoch": 0.0872,
      "grad_norm": 0.6016777157783508,
      "learning_rate": 9.654004284702712e-06,
      "loss": 0.3803,
      "step": 1036
    },
    {
      "epoch": 0.0874,
      "grad_norm": 0.5772127509117126,
      "learning_rate": 9.65272722178763e-06,
      "loss": 0.3705,
      "step": 1037
    },
    {
      "epoch": 0.0876,
      "grad_norm": 0.8341662287712097,
      "learning_rate": 9.651447891189824e-06,
      "loss": 0.368,
      "step": 1038
    },
    {
      "epoch": 0.0878,
      "grad_norm": 0.6734280586242676,
      "learning_rate": 9.650166293532822e-06,
      "loss": 0.3745,
      "step": 1039
    },
    {
      "epoch": 0.088,
      "grad_norm": 0.5251243710517883,
      "learning_rate": 9.648882429441258e-06,
      "loss": 0.3553,
      "step": 1040
    },
    {
      "epoch": 0.0882,
      "grad_norm": 0.53561931848526,
      "learning_rate": 9.647596299540874e-06,
      "loss": 0.3576,
      "step": 1041
    },
    {
      "epoch": 0.0884,
      "grad_norm": 0.530691385269165,
      "learning_rate": 9.646307904458513e-06,
      "loss": 0.336,
      "step": 1042
    },
    {
      "epoch": 0.0886,
      "grad_norm": 0.5595331788063049,
      "learning_rate": 9.645017244822124e-06,
      "loss": 0.3442,
      "step": 1043
    },
    {
      "epoch": 0.0888,
      "grad_norm": 0.6206316351890564,
      "learning_rate": 9.643724321260757e-06,
      "loss": 0.3836,
      "step": 1044
    },
    {
      "epoch": 0.089,
      "grad_norm": 0.5363098978996277,
      "learning_rate": 9.642429134404568e-06,
      "loss": 0.3796,
      "step": 1045
    },
    {
      "epoch": 0.0892,
      "grad_norm": 0.43067196011543274,
      "learning_rate": 9.641131684884817e-06,
      "loss": 0.3012,
      "step": 1046
    },
    {
      "epoch": 0.0894,
      "grad_norm": 0.42950958013534546,
      "learning_rate": 9.639831973333864e-06,
      "loss": 0.3555,
      "step": 1047
    },
    {
      "epoch": 0.0896,
      "grad_norm": 0.6277639269828796,
      "learning_rate": 9.638530000385171e-06,
      "loss": 0.3653,
      "step": 1048
    },
    {
      "epoch": 0.0898,
      "grad_norm": 0.4326036870479584,
      "learning_rate": 9.637225766673309e-06,
      "loss": 0.3399,
      "step": 1049
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.4661896526813507,
      "learning_rate": 9.635919272833938e-06,
      "loss": 0.3128,
      "step": 1050
    },
    {
      "epoch": 0.0902,
      "grad_norm": 0.46891459822654724,
      "learning_rate": 9.634610519503833e-06,
      "loss": 0.359,
      "step": 1051
    },
    {
      "epoch": 0.0904,
      "grad_norm": 0.8917360305786133,
      "learning_rate": 9.633299507320862e-06,
      "loss": 0.3684,
      "step": 1052
    },
    {
      "epoch": 0.0906,
      "grad_norm": 0.4923088550567627,
      "learning_rate": 9.631986236923998e-06,
      "loss": 0.3485,
      "step": 1053
    },
    {
      "epoch": 0.0908,
      "grad_norm": 0.43446192145347595,
      "learning_rate": 9.630670708953311e-06,
      "loss": 0.336,
      "step": 1054
    },
    {
      "epoch": 0.091,
      "grad_norm": 0.5108198523521423,
      "learning_rate": 9.629352924049975e-06,
      "loss": 0.3986,
      "step": 1055
    },
    {
      "epoch": 0.0912,
      "grad_norm": 0.5163364410400391,
      "learning_rate": 9.628032882856262e-06,
      "loss": 0.3466,
      "step": 1056
    },
    {
      "epoch": 0.0914,
      "grad_norm": 0.6018378138542175,
      "learning_rate": 9.626710586015543e-06,
      "loss": 0.3786,
      "step": 1057
    },
    {
      "epoch": 0.0916,
      "grad_norm": 0.4103482663631439,
      "learning_rate": 9.62538603417229e-06,
      "loss": 0.3291,
      "step": 1058
    },
    {
      "epoch": 0.0918,
      "grad_norm": 0.6276087760925293,
      "learning_rate": 9.624059227972077e-06,
      "loss": 0.3549,
      "step": 1059
    },
    {
      "epoch": 0.092,
      "grad_norm": 0.4890395700931549,
      "learning_rate": 9.622730168061568e-06,
      "loss": 0.357,
      "step": 1060
    },
    {
      "epoch": 0.0922,
      "grad_norm": 0.5783658027648926,
      "learning_rate": 9.62139885508853e-06,
      "loss": 0.3577,
      "step": 1061
    },
    {
      "epoch": 0.0924,
      "grad_norm": 0.5455384254455566,
      "learning_rate": 9.620065289701835e-06,
      "loss": 0.3814,
      "step": 1062
    },
    {
      "epoch": 0.0926,
      "grad_norm": 0.5742502808570862,
      "learning_rate": 9.61872947255144e-06,
      "loss": 0.3351,
      "step": 1063
    },
    {
      "epoch": 0.0928,
      "grad_norm": 0.5078641176223755,
      "learning_rate": 9.617391404288412e-06,
      "loss": 0.3363,
      "step": 1064
    },
    {
      "epoch": 0.093,
      "grad_norm": 0.5563531517982483,
      "learning_rate": 9.616051085564905e-06,
      "loss": 0.3577,
      "step": 1065
    },
    {
      "epoch": 0.0932,
      "grad_norm": 0.49870017170906067,
      "learning_rate": 9.614708517034176e-06,
      "loss": 0.3663,
      "step": 1066
    },
    {
      "epoch": 0.0934,
      "grad_norm": 0.4323883056640625,
      "learning_rate": 9.613363699350575e-06,
      "loss": 0.3527,
      "step": 1067
    },
    {
      "epoch": 0.0936,
      "grad_norm": 0.5061573386192322,
      "learning_rate": 9.612016633169552e-06,
      "loss": 0.3309,
      "step": 1068
    },
    {
      "epoch": 0.0938,
      "grad_norm": 0.4836699068546295,
      "learning_rate": 9.610667319147648e-06,
      "loss": 0.3441,
      "step": 1069
    },
    {
      "epoch": 0.094,
      "grad_norm": 0.44975370168685913,
      "learning_rate": 9.609315757942504e-06,
      "loss": 0.3443,
      "step": 1070
    },
    {
      "epoch": 0.0942,
      "grad_norm": 0.7969689965248108,
      "learning_rate": 9.607961950212855e-06,
      "loss": 0.3296,
      "step": 1071
    },
    {
      "epoch": 0.0944,
      "grad_norm": 0.47381532192230225,
      "learning_rate": 9.606605896618528e-06,
      "loss": 0.3642,
      "step": 1072
    },
    {
      "epoch": 0.0946,
      "grad_norm": 0.5705201625823975,
      "learning_rate": 9.605247597820448e-06,
      "loss": 0.3783,
      "step": 1073
    },
    {
      "epoch": 0.0948,
      "grad_norm": 0.4565364718437195,
      "learning_rate": 9.603887054480636e-06,
      "loss": 0.3319,
      "step": 1074
    },
    {
      "epoch": 0.095,
      "grad_norm": 0.5087733864784241,
      "learning_rate": 9.602524267262202e-06,
      "loss": 0.3809,
      "step": 1075
    },
    {
      "epoch": 0.0952,
      "grad_norm": 0.5657903552055359,
      "learning_rate": 9.601159236829353e-06,
      "loss": 0.4067,
      "step": 1076
    },
    {
      "epoch": 0.0954,
      "grad_norm": 0.5435850024223328,
      "learning_rate": 9.599791963847388e-06,
      "loss": 0.3894,
      "step": 1077
    },
    {
      "epoch": 0.0956,
      "grad_norm": 0.5018324255943298,
      "learning_rate": 9.598422448982697e-06,
      "loss": 0.3655,
      "step": 1078
    },
    {
      "epoch": 0.0958,
      "grad_norm": 0.44521066546440125,
      "learning_rate": 9.597050692902765e-06,
      "loss": 0.3571,
      "step": 1079
    },
    {
      "epoch": 0.096,
      "grad_norm": 0.5333754420280457,
      "learning_rate": 9.595676696276173e-06,
      "loss": 0.32,
      "step": 1080
    },
    {
      "epoch": 0.0962,
      "grad_norm": 0.5294404029846191,
      "learning_rate": 9.594300459772588e-06,
      "loss": 0.343,
      "step": 1081
    },
    {
      "epoch": 0.0964,
      "grad_norm": 0.4388384222984314,
      "learning_rate": 9.592921984062771e-06,
      "loss": 0.3386,
      "step": 1082
    },
    {
      "epoch": 0.0966,
      "grad_norm": 0.4390753507614136,
      "learning_rate": 9.591541269818574e-06,
      "loss": 0.358,
      "step": 1083
    },
    {
      "epoch": 0.0968,
      "grad_norm": 0.5770740509033203,
      "learning_rate": 9.590158317712941e-06,
      "loss": 0.3642,
      "step": 1084
    },
    {
      "epoch": 0.097,
      "grad_norm": 1.5599743127822876,
      "learning_rate": 9.588773128419907e-06,
      "loss": 0.352,
      "step": 1085
    },
    {
      "epoch": 0.0972,
      "grad_norm": 0.5236835479736328,
      "learning_rate": 9.587385702614593e-06,
      "loss": 0.3495,
      "step": 1086
    },
    {
      "epoch": 0.0974,
      "grad_norm": 0.47789889574050903,
      "learning_rate": 9.585996040973218e-06,
      "loss": 0.3942,
      "step": 1087
    },
    {
      "epoch": 0.0976,
      "grad_norm": 0.4701448678970337,
      "learning_rate": 9.584604144173084e-06,
      "loss": 0.3681,
      "step": 1088
    },
    {
      "epoch": 0.0978,
      "grad_norm": 0.4668615460395813,
      "learning_rate": 9.583210012892582e-06,
      "loss": 0.3367,
      "step": 1089
    },
    {
      "epoch": 0.098,
      "grad_norm": 0.4576357901096344,
      "learning_rate": 9.581813647811199e-06,
      "loss": 0.3461,
      "step": 1090
    },
    {
      "epoch": 0.0982,
      "grad_norm": 0.5270260572433472,
      "learning_rate": 9.580415049609503e-06,
      "loss": 0.3479,
      "step": 1091
    },
    {
      "epoch": 0.0984,
      "grad_norm": 0.40753114223480225,
      "learning_rate": 9.579014218969158e-06,
      "loss": 0.3586,
      "step": 1092
    },
    {
      "epoch": 0.0986,
      "grad_norm": 0.4803791046142578,
      "learning_rate": 9.577611156572908e-06,
      "loss": 0.3496,
      "step": 1093
    },
    {
      "epoch": 0.0988,
      "grad_norm": 0.5986677408218384,
      "learning_rate": 9.576205863104588e-06,
      "loss": 0.3659,
      "step": 1094
    },
    {
      "epoch": 0.099,
      "grad_norm": 0.5206531286239624,
      "learning_rate": 9.574798339249124e-06,
      "loss": 0.3464,
      "step": 1095
    },
    {
      "epoch": 0.0992,
      "grad_norm": 0.5747353434562683,
      "learning_rate": 9.573388585692525e-06,
      "loss": 0.3803,
      "step": 1096
    },
    {
      "epoch": 0.0994,
      "grad_norm": 0.45594194531440735,
      "learning_rate": 9.571976603121889e-06,
      "loss": 0.3592,
      "step": 1097
    },
    {
      "epoch": 0.0996,
      "grad_norm": 0.5241343975067139,
      "learning_rate": 9.570562392225395e-06,
      "loss": 0.3589,
      "step": 1098
    },
    {
      "epoch": 0.0998,
      "grad_norm": 0.5716864466667175,
      "learning_rate": 9.569145953692316e-06,
      "loss": 0.3701,
      "step": 1099
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.6236914396286011,
      "learning_rate": 9.567727288213005e-06,
      "loss": 0.3748,
      "step": 1100
    },
    {
      "epoch": 0.1002,
      "grad_norm": 0.49273043870925903,
      "learning_rate": 9.566306396478904e-06,
      "loss": 0.3354,
      "step": 1101
    },
    {
      "epoch": 0.1004,
      "grad_norm": 0.4986874461174011,
      "learning_rate": 9.564883279182538e-06,
      "loss": 0.3174,
      "step": 1102
    },
    {
      "epoch": 0.1006,
      "grad_norm": 0.6297382712364197,
      "learning_rate": 9.563457937017514e-06,
      "loss": 0.3617,
      "step": 1103
    },
    {
      "epoch": 0.1008,
      "grad_norm": 0.48524534702301025,
      "learning_rate": 9.562030370678533e-06,
      "loss": 0.3867,
      "step": 1104
    },
    {
      "epoch": 0.101,
      "grad_norm": 0.4474465548992157,
      "learning_rate": 9.560600580861366e-06,
      "loss": 0.3249,
      "step": 1105
    },
    {
      "epoch": 0.1012,
      "grad_norm": 0.572662353515625,
      "learning_rate": 9.55916856826288e-06,
      "loss": 0.339,
      "step": 1106
    },
    {
      "epoch": 0.1014,
      "grad_norm": 1.0939364433288574,
      "learning_rate": 9.557734333581019e-06,
      "loss": 0.3924,
      "step": 1107
    },
    {
      "epoch": 0.1016,
      "grad_norm": 0.5802388787269592,
      "learning_rate": 9.556297877514812e-06,
      "loss": 0.3116,
      "step": 1108
    },
    {
      "epoch": 0.1018,
      "grad_norm": 0.7193163633346558,
      "learning_rate": 9.554859200764371e-06,
      "loss": 0.3373,
      "step": 1109
    },
    {
      "epoch": 0.102,
      "grad_norm": 1.1138827800750732,
      "learning_rate": 9.553418304030886e-06,
      "loss": 0.3783,
      "step": 1110
    },
    {
      "epoch": 0.1022,
      "grad_norm": 0.602170467376709,
      "learning_rate": 9.551975188016638e-06,
      "loss": 0.3333,
      "step": 1111
    },
    {
      "epoch": 0.1024,
      "grad_norm": 0.5407208204269409,
      "learning_rate": 9.550529853424979e-06,
      "loss": 0.372,
      "step": 1112
    },
    {
      "epoch": 0.1026,
      "grad_norm": 0.502263069152832,
      "learning_rate": 9.549082300960351e-06,
      "loss": 0.3768,
      "step": 1113
    },
    {
      "epoch": 0.1028,
      "grad_norm": 0.45668458938598633,
      "learning_rate": 9.547632531328273e-06,
      "loss": 0.352,
      "step": 1114
    },
    {
      "epoch": 0.103,
      "grad_norm": 0.6921423077583313,
      "learning_rate": 9.546180545235344e-06,
      "loss": 0.3407,
      "step": 1115
    },
    {
      "epoch": 0.1032,
      "grad_norm": 0.5054295659065247,
      "learning_rate": 9.544726343389245e-06,
      "loss": 0.3842,
      "step": 1116
    },
    {
      "epoch": 0.1034,
      "grad_norm": 0.479579359292984,
      "learning_rate": 9.543269926498735e-06,
      "loss": 0.3581,
      "step": 1117
    },
    {
      "epoch": 0.1036,
      "grad_norm": 0.48015955090522766,
      "learning_rate": 9.541811295273657e-06,
      "loss": 0.3573,
      "step": 1118
    },
    {
      "epoch": 0.1038,
      "grad_norm": 0.7124462127685547,
      "learning_rate": 9.540350450424927e-06,
      "loss": 0.3476,
      "step": 1119
    },
    {
      "epoch": 0.104,
      "grad_norm": 0.5936033725738525,
      "learning_rate": 9.538887392664544e-06,
      "loss": 0.3658,
      "step": 1120
    },
    {
      "epoch": 0.1042,
      "grad_norm": 0.49928274750709534,
      "learning_rate": 9.537422122705585e-06,
      "loss": 0.3691,
      "step": 1121
    },
    {
      "epoch": 0.1044,
      "grad_norm": 0.49258556962013245,
      "learning_rate": 9.535954641262206e-06,
      "loss": 0.3205,
      "step": 1122
    },
    {
      "epoch": 0.1046,
      "grad_norm": 0.46767520904541016,
      "learning_rate": 9.534484949049636e-06,
      "loss": 0.3557,
      "step": 1123
    },
    {
      "epoch": 0.1048,
      "grad_norm": 0.5055413246154785,
      "learning_rate": 9.53301304678419e-06,
      "loss": 0.3639,
      "step": 1124
    },
    {
      "epoch": 0.105,
      "grad_norm": 0.4890922009944916,
      "learning_rate": 9.531538935183252e-06,
      "loss": 0.3815,
      "step": 1125
    },
    {
      "epoch": 0.1052,
      "grad_norm": 0.6504900455474854,
      "learning_rate": 9.530062614965286e-06,
      "loss": 0.3873,
      "step": 1126
    },
    {
      "epoch": 0.1054,
      "grad_norm": 0.4739128351211548,
      "learning_rate": 9.528584086849832e-06,
      "loss": 0.378,
      "step": 1127
    },
    {
      "epoch": 0.1056,
      "grad_norm": 0.8000157475471497,
      "learning_rate": 9.52710335155751e-06,
      "loss": 0.3578,
      "step": 1128
    },
    {
      "epoch": 0.1058,
      "grad_norm": 0.5482091903686523,
      "learning_rate": 9.525620409810009e-06,
      "loss": 0.3506,
      "step": 1129
    },
    {
      "epoch": 0.106,
      "grad_norm": 0.4824024438858032,
      "learning_rate": 9.524135262330098e-06,
      "loss": 0.3268,
      "step": 1130
    },
    {
      "epoch": 0.1062,
      "grad_norm": 0.5250397324562073,
      "learning_rate": 9.52264790984162e-06,
      "loss": 0.3593,
      "step": 1131
    },
    {
      "epoch": 0.1064,
      "grad_norm": 0.49065932631492615,
      "learning_rate": 9.521158353069494e-06,
      "loss": 0.3279,
      "step": 1132
    },
    {
      "epoch": 0.1066,
      "grad_norm": 0.5136129260063171,
      "learning_rate": 9.51966659273971e-06,
      "loss": 0.3259,
      "step": 1133
    },
    {
      "epoch": 0.1068,
      "grad_norm": 0.6091078519821167,
      "learning_rate": 9.518172629579334e-06,
      "loss": 0.3531,
      "step": 1134
    },
    {
      "epoch": 0.107,
      "grad_norm": 0.5273619294166565,
      "learning_rate": 9.516676464316505e-06,
      "loss": 0.36,
      "step": 1135
    },
    {
      "epoch": 0.1072,
      "grad_norm": 0.5556766986846924,
      "learning_rate": 9.515178097680437e-06,
      "loss": 0.3531,
      "step": 1136
    },
    {
      "epoch": 0.1074,
      "grad_norm": 0.5435210466384888,
      "learning_rate": 9.513677530401415e-06,
      "loss": 0.3436,
      "step": 1137
    },
    {
      "epoch": 0.1076,
      "grad_norm": 0.8341330289840698,
      "learning_rate": 9.512174763210798e-06,
      "loss": 0.3455,
      "step": 1138
    },
    {
      "epoch": 0.1078,
      "grad_norm": 0.7881489396095276,
      "learning_rate": 9.510669796841014e-06,
      "loss": 0.3397,
      "step": 1139
    },
    {
      "epoch": 0.108,
      "grad_norm": 0.458372563123703,
      "learning_rate": 9.50916263202557e-06,
      "loss": 0.3256,
      "step": 1140
    },
    {
      "epoch": 0.1082,
      "grad_norm": 0.6706159710884094,
      "learning_rate": 9.507653269499035e-06,
      "loss": 0.3751,
      "step": 1141
    },
    {
      "epoch": 0.1084,
      "grad_norm": 0.5209953784942627,
      "learning_rate": 9.506141709997058e-06,
      "loss": 0.3391,
      "step": 1142
    },
    {
      "epoch": 0.1086,
      "grad_norm": 0.5265771746635437,
      "learning_rate": 9.504627954256352e-06,
      "loss": 0.3493,
      "step": 1143
    },
    {
      "epoch": 0.1088,
      "grad_norm": 0.5406778454780579,
      "learning_rate": 9.503112003014702e-06,
      "loss": 0.3683,
      "step": 1144
    },
    {
      "epoch": 0.109,
      "grad_norm": 0.5090137124061584,
      "learning_rate": 9.501593857010968e-06,
      "loss": 0.3604,
      "step": 1145
    },
    {
      "epoch": 0.1092,
      "grad_norm": 0.526154100894928,
      "learning_rate": 9.500073516985074e-06,
      "loss": 0.3711,
      "step": 1146
    },
    {
      "epoch": 0.1094,
      "grad_norm": 0.4734779894351959,
      "learning_rate": 9.498550983678016e-06,
      "loss": 0.3355,
      "step": 1147
    },
    {
      "epoch": 0.1096,
      "grad_norm": 0.49225884675979614,
      "learning_rate": 9.497026257831856e-06,
      "loss": 0.3376,
      "step": 1148
    },
    {
      "epoch": 0.1098,
      "grad_norm": 0.5177637934684753,
      "learning_rate": 9.495499340189729e-06,
      "loss": 0.3522,
      "step": 1149
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.49691468477249146,
      "learning_rate": 9.493970231495836e-06,
      "loss": 0.3707,
      "step": 1150
    },
    {
      "epoch": 0.1102,
      "grad_norm": 0.6227887868881226,
      "learning_rate": 9.492438932495444e-06,
      "loss": 0.3824,
      "step": 1151
    },
    {
      "epoch": 0.1104,
      "grad_norm": 0.583932101726532,
      "learning_rate": 9.490905443934892e-06,
      "loss": 0.3844,
      "step": 1152
    },
    {
      "epoch": 0.1106,
      "grad_norm": 0.4757688045501709,
      "learning_rate": 9.489369766561584e-06,
      "loss": 0.3719,
      "step": 1153
    },
    {
      "epoch": 0.1108,
      "grad_norm": 0.6931418776512146,
      "learning_rate": 9.487831901123989e-06,
      "loss": 0.341,
      "step": 1154
    },
    {
      "epoch": 0.111,
      "grad_norm": 0.35830995440483093,
      "learning_rate": 9.486291848371642e-06,
      "loss": 0.3128,
      "step": 1155
    },
    {
      "epoch": 0.1112,
      "grad_norm": 0.5050476789474487,
      "learning_rate": 9.484749609055151e-06,
      "loss": 0.3643,
      "step": 1156
    },
    {
      "epoch": 0.1114,
      "grad_norm": 0.4266575872898102,
      "learning_rate": 9.48320518392618e-06,
      "loss": 0.3403,
      "step": 1157
    },
    {
      "epoch": 0.1116,
      "grad_norm": 0.5869317054748535,
      "learning_rate": 9.481658573737465e-06,
      "loss": 0.3901,
      "step": 1158
    },
    {
      "epoch": 0.1118,
      "grad_norm": 0.4829937815666199,
      "learning_rate": 9.480109779242805e-06,
      "loss": 0.3673,
      "step": 1159
    },
    {
      "epoch": 0.112,
      "grad_norm": 0.48449984192848206,
      "learning_rate": 9.478558801197065e-06,
      "loss": 0.3321,
      "step": 1160
    },
    {
      "epoch": 0.1122,
      "grad_norm": 0.5225324630737305,
      "learning_rate": 9.47700564035617e-06,
      "loss": 0.3555,
      "step": 1161
    },
    {
      "epoch": 0.1124,
      "grad_norm": 0.4384607970714569,
      "learning_rate": 9.475450297477113e-06,
      "loss": 0.3403,
      "step": 1162
    },
    {
      "epoch": 0.1126,
      "grad_norm": 0.50686115026474,
      "learning_rate": 9.473892773317952e-06,
      "loss": 0.3606,
      "step": 1163
    },
    {
      "epoch": 0.1128,
      "grad_norm": 0.4873618483543396,
      "learning_rate": 9.4723330686378e-06,
      "loss": 0.3636,
      "step": 1164
    },
    {
      "epoch": 0.113,
      "grad_norm": 0.41635802388191223,
      "learning_rate": 9.470771184196842e-06,
      "loss": 0.3362,
      "step": 1165
    },
    {
      "epoch": 0.1132,
      "grad_norm": 0.6017654538154602,
      "learning_rate": 9.46920712075632e-06,
      "loss": 0.3876,
      "step": 1166
    },
    {
      "epoch": 0.1134,
      "grad_norm": 0.6001546382904053,
      "learning_rate": 9.46764087907854e-06,
      "loss": 0.3312,
      "step": 1167
    },
    {
      "epoch": 0.1136,
      "grad_norm": 0.5046603679656982,
      "learning_rate": 9.46607245992687e-06,
      "loss": 0.3486,
      "step": 1168
    },
    {
      "epoch": 0.1138,
      "grad_norm": 0.5154598951339722,
      "learning_rate": 9.464501864065735e-06,
      "loss": 0.3865,
      "step": 1169
    },
    {
      "epoch": 0.114,
      "grad_norm": 0.5171992182731628,
      "learning_rate": 9.46292909226063e-06,
      "loss": 0.3332,
      "step": 1170
    },
    {
      "epoch": 0.1142,
      "grad_norm": 0.5726218819618225,
      "learning_rate": 9.461354145278098e-06,
      "loss": 0.34,
      "step": 1171
    },
    {
      "epoch": 0.1144,
      "grad_norm": 0.5547689199447632,
      "learning_rate": 9.459777023885754e-06,
      "loss": 0.3902,
      "step": 1172
    },
    {
      "epoch": 0.1146,
      "grad_norm": 0.6879380941390991,
      "learning_rate": 9.458197728852268e-06,
      "loss": 0.3537,
      "step": 1173
    },
    {
      "epoch": 0.1148,
      "grad_norm": 0.4733753502368927,
      "learning_rate": 9.456616260947367e-06,
      "loss": 0.3799,
      "step": 1174
    },
    {
      "epoch": 0.115,
      "grad_norm": 0.43653035163879395,
      "learning_rate": 9.45503262094184e-06,
      "loss": 0.3272,
      "step": 1175
    },
    {
      "epoch": 0.1152,
      "grad_norm": 0.5041536092758179,
      "learning_rate": 9.453446809607534e-06,
      "loss": 0.3629,
      "step": 1176
    },
    {
      "epoch": 0.1154,
      "grad_norm": 0.5370034575462341,
      "learning_rate": 9.451858827717354e-06,
      "loss": 0.3688,
      "step": 1177
    },
    {
      "epoch": 0.1156,
      "grad_norm": 0.45768848061561584,
      "learning_rate": 9.450268676045261e-06,
      "loss": 0.3477,
      "step": 1178
    },
    {
      "epoch": 0.1158,
      "grad_norm": 0.5216701626777649,
      "learning_rate": 9.448676355366282e-06,
      "loss": 0.3563,
      "step": 1179
    },
    {
      "epoch": 0.116,
      "grad_norm": 0.5274759531021118,
      "learning_rate": 9.44708186645649e-06,
      "loss": 0.3296,
      "step": 1180
    },
    {
      "epoch": 0.1162,
      "grad_norm": 0.6502773761749268,
      "learning_rate": 9.445485210093018e-06,
      "loss": 0.3621,
      "step": 1181
    },
    {
      "epoch": 0.1164,
      "grad_norm": 0.5024909973144531,
      "learning_rate": 9.443886387054058e-06,
      "loss": 0.364,
      "step": 1182
    },
    {
      "epoch": 0.1166,
      "grad_norm": 0.40306851267814636,
      "learning_rate": 9.44228539811886e-06,
      "loss": 0.3589,
      "step": 1183
    },
    {
      "epoch": 0.1168,
      "grad_norm": 0.9481571912765503,
      "learning_rate": 9.440682244067724e-06,
      "loss": 0.356,
      "step": 1184
    },
    {
      "epoch": 0.117,
      "grad_norm": 0.5808488726615906,
      "learning_rate": 9.439076925682006e-06,
      "loss": 0.4013,
      "step": 1185
    },
    {
      "epoch": 0.1172,
      "grad_norm": 0.5002434253692627,
      "learning_rate": 9.437469443744124e-06,
      "loss": 0.3856,
      "step": 1186
    },
    {
      "epoch": 0.1174,
      "grad_norm": 0.454313188791275,
      "learning_rate": 9.435859799037541e-06,
      "loss": 0.3429,
      "step": 1187
    },
    {
      "epoch": 0.1176,
      "grad_norm": 0.4779631495475769,
      "learning_rate": 9.43424799234678e-06,
      "loss": 0.3446,
      "step": 1188
    },
    {
      "epoch": 0.1178,
      "grad_norm": 0.5233564376831055,
      "learning_rate": 9.432634024457414e-06,
      "loss": 0.3365,
      "step": 1189
    },
    {
      "epoch": 0.118,
      "grad_norm": 0.49087968468666077,
      "learning_rate": 9.431017896156074e-06,
      "loss": 0.358,
      "step": 1190
    },
    {
      "epoch": 0.1182,
      "grad_norm": 0.5212580561637878,
      "learning_rate": 9.429399608230441e-06,
      "loss": 0.3596,
      "step": 1191
    },
    {
      "epoch": 0.1184,
      "grad_norm": 0.45012229681015015,
      "learning_rate": 9.427779161469246e-06,
      "loss": 0.3803,
      "step": 1192
    },
    {
      "epoch": 0.1186,
      "grad_norm": 0.7486437559127808,
      "learning_rate": 9.426156556662276e-06,
      "loss": 0.3804,
      "step": 1193
    },
    {
      "epoch": 0.1188,
      "grad_norm": 0.4697062075138092,
      "learning_rate": 9.424531794600372e-06,
      "loss": 0.3476,
      "step": 1194
    },
    {
      "epoch": 0.119,
      "grad_norm": 0.5453007221221924,
      "learning_rate": 9.42290487607542e-06,
      "loss": 0.3613,
      "step": 1195
    },
    {
      "epoch": 0.1192,
      "grad_norm": 0.6058673858642578,
      "learning_rate": 9.421275801880363e-06,
      "loss": 0.3324,
      "step": 1196
    },
    {
      "epoch": 0.1194,
      "grad_norm": 0.5053179860115051,
      "learning_rate": 9.419644572809189e-06,
      "loss": 0.3457,
      "step": 1197
    },
    {
      "epoch": 0.1196,
      "grad_norm": 0.49429404735565186,
      "learning_rate": 9.418011189656942e-06,
      "loss": 0.337,
      "step": 1198
    },
    {
      "epoch": 0.1198,
      "grad_norm": 0.48942291736602783,
      "learning_rate": 9.41637565321971e-06,
      "loss": 0.3294,
      "step": 1199
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.5437662601470947,
      "learning_rate": 9.414737964294636e-06,
      "loss": 0.3276,
      "step": 1200
    },
    {
      "epoch": 0.1202,
      "grad_norm": 0.5349637269973755,
      "learning_rate": 9.41309812367991e-06,
      "loss": 0.3334,
      "step": 1201
    },
    {
      "epoch": 0.1204,
      "grad_norm": 0.5308757424354553,
      "learning_rate": 9.411456132174768e-06,
      "loss": 0.3607,
      "step": 1202
    },
    {
      "epoch": 0.1206,
      "grad_norm": 0.5233594179153442,
      "learning_rate": 9.409811990579498e-06,
      "loss": 0.3626,
      "step": 1203
    },
    {
      "epoch": 0.1208,
      "grad_norm": 0.7127656936645508,
      "learning_rate": 9.408165699695435e-06,
      "loss": 0.394,
      "step": 1204
    },
    {
      "epoch": 0.121,
      "grad_norm": 0.4882444739341736,
      "learning_rate": 9.406517260324962e-06,
      "loss": 0.3824,
      "step": 1205
    },
    {
      "epoch": 0.1212,
      "grad_norm": 0.4881872236728668,
      "learning_rate": 9.404866673271506e-06,
      "loss": 0.3354,
      "step": 1206
    },
    {
      "epoch": 0.1214,
      "grad_norm": 0.4726344048976898,
      "learning_rate": 9.403213939339546e-06,
      "loss": 0.3788,
      "step": 1207
    },
    {
      "epoch": 0.1216,
      "grad_norm": 0.611290693283081,
      "learning_rate": 9.401559059334601e-06,
      "loss": 0.3481,
      "step": 1208
    },
    {
      "epoch": 0.1218,
      "grad_norm": 0.473889023065567,
      "learning_rate": 9.399902034063244e-06,
      "loss": 0.3459,
      "step": 1209
    },
    {
      "epoch": 0.122,
      "grad_norm": 0.44426506757736206,
      "learning_rate": 9.398242864333084e-06,
      "loss": 0.3207,
      "step": 1210
    },
    {
      "epoch": 0.1222,
      "grad_norm": 0.4222564399242401,
      "learning_rate": 9.396581550952781e-06,
      "loss": 0.3429,
      "step": 1211
    },
    {
      "epoch": 0.1224,
      "grad_norm": 0.5440704226493835,
      "learning_rate": 9.394918094732044e-06,
      "loss": 0.3907,
      "step": 1212
    },
    {
      "epoch": 0.1226,
      "grad_norm": 0.45974430441856384,
      "learning_rate": 9.393252496481615e-06,
      "loss": 0.3467,
      "step": 1213
    },
    {
      "epoch": 0.1228,
      "grad_norm": 0.491125226020813,
      "learning_rate": 9.39158475701329e-06,
      "loss": 0.3677,
      "step": 1214
    },
    {
      "epoch": 0.123,
      "grad_norm": 0.5549747347831726,
      "learning_rate": 9.389914877139903e-06,
      "loss": 0.3642,
      "step": 1215
    },
    {
      "epoch": 0.1232,
      "grad_norm": 0.5737738013267517,
      "learning_rate": 9.388242857675336e-06,
      "loss": 0.3429,
      "step": 1216
    },
    {
      "epoch": 0.1234,
      "grad_norm": 0.5800195336341858,
      "learning_rate": 9.386568699434509e-06,
      "loss": 0.3443,
      "step": 1217
    },
    {
      "epoch": 0.1236,
      "grad_norm": 0.4700586795806885,
      "learning_rate": 9.384892403233384e-06,
      "loss": 0.3498,
      "step": 1218
    },
    {
      "epoch": 0.1238,
      "grad_norm": 0.4195355176925659,
      "learning_rate": 9.383213969888972e-06,
      "loss": 0.3165,
      "step": 1219
    },
    {
      "epoch": 0.124,
      "grad_norm": 0.5736050605773926,
      "learning_rate": 9.381533400219319e-06,
      "loss": 0.3778,
      "step": 1220
    },
    {
      "epoch": 0.1242,
      "grad_norm": 0.6018197536468506,
      "learning_rate": 9.379850695043513e-06,
      "loss": 0.3716,
      "step": 1221
    },
    {
      "epoch": 0.1244,
      "grad_norm": 0.45120471715927124,
      "learning_rate": 9.378165855181687e-06,
      "loss": 0.341,
      "step": 1222
    },
    {
      "epoch": 0.1246,
      "grad_norm": 0.637415885925293,
      "learning_rate": 9.376478881455008e-06,
      "loss": 0.3219,
      "step": 1223
    },
    {
      "epoch": 0.1248,
      "grad_norm": 0.43216440081596375,
      "learning_rate": 9.37478977468569e-06,
      "loss": 0.3315,
      "step": 1224
    },
    {
      "epoch": 0.125,
      "grad_norm": 0.5679078102111816,
      "learning_rate": 9.37309853569698e-06,
      "loss": 0.3948,
      "step": 1225
    },
    {
      "epoch": 0.1252,
      "grad_norm": 0.6967563033103943,
      "learning_rate": 9.371405165313169e-06,
      "loss": 0.3429,
      "step": 1226
    },
    {
      "epoch": 0.1254,
      "grad_norm": 0.5948230028152466,
      "learning_rate": 9.369709664359585e-06,
      "loss": 0.3683,
      "step": 1227
    },
    {
      "epoch": 0.1256,
      "grad_norm": 0.4793342351913452,
      "learning_rate": 9.368012033662594e-06,
      "loss": 0.3248,
      "step": 1228
    },
    {
      "epoch": 0.1258,
      "grad_norm": 0.48579245805740356,
      "learning_rate": 9.366312274049602e-06,
      "loss": 0.3221,
      "step": 1229
    },
    {
      "epoch": 0.126,
      "grad_norm": 0.418612003326416,
      "learning_rate": 9.364610386349048e-06,
      "loss": 0.3719,
      "step": 1230
    },
    {
      "epoch": 0.1262,
      "grad_norm": 0.5713820457458496,
      "learning_rate": 9.362906371390416e-06,
      "loss": 0.3222,
      "step": 1231
    },
    {
      "epoch": 0.1264,
      "grad_norm": 0.558028519153595,
      "learning_rate": 9.361200230004219e-06,
      "loss": 0.3485,
      "step": 1232
    },
    {
      "epoch": 0.1266,
      "grad_norm": 0.4297710955142975,
      "learning_rate": 9.35949196302201e-06,
      "loss": 0.3277,
      "step": 1233
    },
    {
      "epoch": 0.1268,
      "grad_norm": 0.5568429827690125,
      "learning_rate": 9.357781571276379e-06,
      "loss": 0.3617,
      "step": 1234
    },
    {
      "epoch": 0.127,
      "grad_norm": 0.5712378621101379,
      "learning_rate": 9.356069055600949e-06,
      "loss": 0.357,
      "step": 1235
    },
    {
      "epoch": 0.1272,
      "grad_norm": 0.7178053259849548,
      "learning_rate": 9.354354416830377e-06,
      "loss": 0.3371,
      "step": 1236
    },
    {
      "epoch": 0.1274,
      "grad_norm": 0.4613014757633209,
      "learning_rate": 9.352637655800362e-06,
      "loss": 0.3667,
      "step": 1237
    },
    {
      "epoch": 0.1276,
      "grad_norm": 0.6268631219863892,
      "learning_rate": 9.35091877334763e-06,
      "loss": 0.3679,
      "step": 1238
    },
    {
      "epoch": 0.1278,
      "grad_norm": 0.4313611388206482,
      "learning_rate": 9.349197770309942e-06,
      "loss": 0.3411,
      "step": 1239
    },
    {
      "epoch": 0.128,
      "grad_norm": 0.5372244119644165,
      "learning_rate": 9.347474647526095e-06,
      "loss": 0.3649,
      "step": 1240
    },
    {
      "epoch": 0.1282,
      "grad_norm": 0.4558833837509155,
      "learning_rate": 9.34574940583592e-06,
      "loss": 0.3256,
      "step": 1241
    },
    {
      "epoch": 0.1284,
      "grad_norm": 0.501487135887146,
      "learning_rate": 9.344022046080277e-06,
      "loss": 0.3572,
      "step": 1242
    },
    {
      "epoch": 0.1286,
      "grad_norm": 0.5023969411849976,
      "learning_rate": 9.342292569101061e-06,
      "loss": 0.3468,
      "step": 1243
    },
    {
      "epoch": 0.1288,
      "grad_norm": 0.5929833054542542,
      "learning_rate": 9.340560975741198e-06,
      "loss": 0.3904,
      "step": 1244
    },
    {
      "epoch": 0.129,
      "grad_norm": 0.514889657497406,
      "learning_rate": 9.338827266844643e-06,
      "loss": 0.3303,
      "step": 1245
    },
    {
      "epoch": 0.1292,
      "grad_norm": 0.5326282978057861,
      "learning_rate": 9.337091443256388e-06,
      "loss": 0.3442,
      "step": 1246
    },
    {
      "epoch": 0.1294,
      "grad_norm": 0.4738917350769043,
      "learning_rate": 9.33535350582245e-06,
      "loss": 0.3775,
      "step": 1247
    },
    {
      "epoch": 0.1296,
      "grad_norm": 1.141087532043457,
      "learning_rate": 9.333613455389883e-06,
      "loss": 0.3361,
      "step": 1248
    },
    {
      "epoch": 0.1298,
      "grad_norm": 0.6002895832061768,
      "learning_rate": 9.33187129280676e-06,
      "loss": 0.3421,
      "step": 1249
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.5179344415664673,
      "learning_rate": 9.330127018922195e-06,
      "loss": 0.3233,
      "step": 1250
    },
    {
      "epoch": 0.1302,
      "grad_norm": 2.2320244312286377,
      "learning_rate": 9.328380634586322e-06,
      "loss": 0.3288,
      "step": 1251
    },
    {
      "epoch": 0.1304,
      "grad_norm": 0.515999436378479,
      "learning_rate": 9.326632140650311e-06,
      "loss": 0.3306,
      "step": 1252
    },
    {
      "epoch": 0.1306,
      "grad_norm": 0.49450457096099854,
      "learning_rate": 9.324881537966355e-06,
      "loss": 0.3815,
      "step": 1253
    },
    {
      "epoch": 0.1308,
      "grad_norm": 0.5525022149085999,
      "learning_rate": 9.323128827387675e-06,
      "loss": 0.3268,
      "step": 1254
    },
    {
      "epoch": 0.131,
      "grad_norm": 0.7800925970077515,
      "learning_rate": 9.321374009768525e-06,
      "loss": 0.3664,
      "step": 1255
    },
    {
      "epoch": 0.1312,
      "grad_norm": 0.593521237373352,
      "learning_rate": 9.319617085964177e-06,
      "loss": 0.332,
      "step": 1256
    },
    {
      "epoch": 0.1314,
      "grad_norm": 0.6501487493515015,
      "learning_rate": 9.317858056830938e-06,
      "loss": 0.3803,
      "step": 1257
    },
    {
      "epoch": 0.1316,
      "grad_norm": 0.7193133234977722,
      "learning_rate": 9.316096923226135e-06,
      "loss": 0.3197,
      "step": 1258
    },
    {
      "epoch": 0.1318,
      "grad_norm": 0.525566041469574,
      "learning_rate": 9.314333686008125e-06,
      "loss": 0.3575,
      "step": 1259
    },
    {
      "epoch": 0.132,
      "grad_norm": 0.5966891050338745,
      "learning_rate": 9.312568346036288e-06,
      "loss": 0.3416,
      "step": 1260
    },
    {
      "epoch": 0.1322,
      "grad_norm": 0.5057774186134338,
      "learning_rate": 9.31080090417103e-06,
      "loss": 0.3503,
      "step": 1261
    },
    {
      "epoch": 0.1324,
      "grad_norm": 0.45291265845298767,
      "learning_rate": 9.309031361273775e-06,
      "loss": 0.3485,
      "step": 1262
    },
    {
      "epoch": 0.1326,
      "grad_norm": 0.5553573369979858,
      "learning_rate": 9.307259718206984e-06,
      "loss": 0.3432,
      "step": 1263
    },
    {
      "epoch": 0.1328,
      "grad_norm": 0.4663254916667938,
      "learning_rate": 9.305485975834132e-06,
      "loss": 0.3413,
      "step": 1264
    },
    {
      "epoch": 0.133,
      "grad_norm": 0.46281203627586365,
      "learning_rate": 9.30371013501972e-06,
      "loss": 0.3574,
      "step": 1265
    },
    {
      "epoch": 0.1332,
      "grad_norm": 0.4642482399940491,
      "learning_rate": 9.301932196629267e-06,
      "loss": 0.3357,
      "step": 1266
    },
    {
      "epoch": 0.1334,
      "grad_norm": 0.6357049942016602,
      "learning_rate": 9.300152161529325e-06,
      "loss": 0.3846,
      "step": 1267
    },
    {
      "epoch": 0.1336,
      "grad_norm": 0.4655270576477051,
      "learning_rate": 9.298370030587456e-06,
      "loss": 0.3287,
      "step": 1268
    },
    {
      "epoch": 0.1338,
      "grad_norm": 1.6271419525146484,
      "learning_rate": 9.296585804672253e-06,
      "loss": 0.339,
      "step": 1269
    },
    {
      "epoch": 0.134,
      "grad_norm": 0.6421656608581543,
      "learning_rate": 9.294799484653323e-06,
      "loss": 0.3292,
      "step": 1270
    },
    {
      "epoch": 0.1342,
      "grad_norm": 0.4701911509037018,
      "learning_rate": 9.293011071401299e-06,
      "loss": 0.357,
      "step": 1271
    },
    {
      "epoch": 0.1344,
      "grad_norm": 0.4839543104171753,
      "learning_rate": 9.291220565787829e-06,
      "loss": 0.392,
      "step": 1272
    },
    {
      "epoch": 0.1346,
      "grad_norm": 0.4690350890159607,
      "learning_rate": 9.289427968685588e-06,
      "loss": 0.3071,
      "step": 1273
    },
    {
      "epoch": 0.1348,
      "grad_norm": 0.7634577751159668,
      "learning_rate": 9.287633280968263e-06,
      "loss": 0.372,
      "step": 1274
    },
    {
      "epoch": 0.135,
      "grad_norm": 0.533515989780426,
      "learning_rate": 9.285836503510562e-06,
      "loss": 0.3905,
      "step": 1275
    },
    {
      "epoch": 0.1352,
      "grad_norm": 0.6451512575149536,
      "learning_rate": 9.284037637188215e-06,
      "loss": 0.3544,
      "step": 1276
    },
    {
      "epoch": 0.1354,
      "grad_norm": 0.8149439692497253,
      "learning_rate": 9.282236682877968e-06,
      "loss": 0.3516,
      "step": 1277
    },
    {
      "epoch": 0.1356,
      "grad_norm": 0.5862975716590881,
      "learning_rate": 9.280433641457582e-06,
      "loss": 0.3107,
      "step": 1278
    },
    {
      "epoch": 0.1358,
      "grad_norm": 0.5314748883247375,
      "learning_rate": 9.278628513805838e-06,
      "loss": 0.3436,
      "step": 1279
    },
    {
      "epoch": 0.136,
      "grad_norm": 0.5319241881370544,
      "learning_rate": 9.276821300802535e-06,
      "loss": 0.3856,
      "step": 1280
    },
    {
      "epoch": 0.1362,
      "grad_norm": 0.5185776948928833,
      "learning_rate": 9.275012003328483e-06,
      "loss": 0.3569,
      "step": 1281
    },
    {
      "epoch": 0.1364,
      "grad_norm": 0.49345412850379944,
      "learning_rate": 9.273200622265516e-06,
      "loss": 0.332,
      "step": 1282
    },
    {
      "epoch": 0.1366,
      "grad_norm": 0.43173748254776,
      "learning_rate": 9.271387158496477e-06,
      "loss": 0.3316,
      "step": 1283
    },
    {
      "epoch": 0.1368,
      "grad_norm": 0.5436722636222839,
      "learning_rate": 9.269571612905227e-06,
      "loss": 0.3495,
      "step": 1284
    },
    {
      "epoch": 0.137,
      "grad_norm": 0.6061972975730896,
      "learning_rate": 9.267753986376638e-06,
      "loss": 0.3907,
      "step": 1285
    },
    {
      "epoch": 0.1372,
      "grad_norm": 0.4670167863368988,
      "learning_rate": 9.265934279796602e-06,
      "loss": 0.3419,
      "step": 1286
    },
    {
      "epoch": 0.1374,
      "grad_norm": 0.5772005915641785,
      "learning_rate": 9.264112494052022e-06,
      "loss": 0.3615,
      "step": 1287
    },
    {
      "epoch": 0.1376,
      "grad_norm": 0.46336629986763,
      "learning_rate": 9.262288630030814e-06,
      "loss": 0.3866,
      "step": 1288
    },
    {
      "epoch": 0.1378,
      "grad_norm": 0.4881265461444855,
      "learning_rate": 9.260462688621906e-06,
      "loss": 0.3681,
      "step": 1289
    },
    {
      "epoch": 0.138,
      "grad_norm": 0.5249412059783936,
      "learning_rate": 9.25863467071524e-06,
      "loss": 0.3335,
      "step": 1290
    },
    {
      "epoch": 0.1382,
      "grad_norm": 0.6204243302345276,
      "learning_rate": 9.256804577201768e-06,
      "loss": 0.323,
      "step": 1291
    },
    {
      "epoch": 0.1384,
      "grad_norm": 0.5160402059555054,
      "learning_rate": 9.25497240897346e-06,
      "loss": 0.3362,
      "step": 1292
    },
    {
      "epoch": 0.1386,
      "grad_norm": 0.5926202535629272,
      "learning_rate": 9.25313816692329e-06,
      "loss": 0.356,
      "step": 1293
    },
    {
      "epoch": 0.1388,
      "grad_norm": 0.5431392192840576,
      "learning_rate": 9.251301851945244e-06,
      "loss": 0.3177,
      "step": 1294
    },
    {
      "epoch": 0.139,
      "grad_norm": 0.5239210724830627,
      "learning_rate": 9.24946346493432e-06,
      "loss": 0.3711,
      "step": 1295
    },
    {
      "epoch": 0.1392,
      "grad_norm": 0.5592888593673706,
      "learning_rate": 9.247623006786529e-06,
      "loss": 0.3454,
      "step": 1296
    },
    {
      "epoch": 0.1394,
      "grad_norm": 0.5783179998397827,
      "learning_rate": 9.245780478398883e-06,
      "loss": 0.3714,
      "step": 1297
    },
    {
      "epoch": 0.1396,
      "grad_norm": 0.4219837784767151,
      "learning_rate": 9.24393588066941e-06,
      "loss": 0.3592,
      "step": 1298
    },
    {
      "epoch": 0.1398,
      "grad_norm": 0.4187585413455963,
      "learning_rate": 9.242089214497146e-06,
      "loss": 0.3119,
      "step": 1299
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.6546344757080078,
      "learning_rate": 9.24024048078213e-06,
      "loss": 0.3776,
      "step": 1300
    },
    {
      "epoch": 0.1402,
      "grad_norm": 0.5756825804710388,
      "learning_rate": 9.238389680425417e-06,
      "loss": 0.3416,
      "step": 1301
    },
    {
      "epoch": 0.1404,
      "grad_norm": 0.5387281179428101,
      "learning_rate": 9.236536814329062e-06,
      "loss": 0.3205,
      "step": 1302
    },
    {
      "epoch": 0.1406,
      "grad_norm": 0.492929071187973,
      "learning_rate": 9.234681883396129e-06,
      "loss": 0.3682,
      "step": 1303
    },
    {
      "epoch": 0.1408,
      "grad_norm": 0.5681840777397156,
      "learning_rate": 9.232824888530689e-06,
      "loss": 0.3479,
      "step": 1304
    },
    {
      "epoch": 0.141,
      "grad_norm": 0.8750943541526794,
      "learning_rate": 9.230965830637821e-06,
      "loss": 0.3232,
      "step": 1305
    },
    {
      "epoch": 0.1412,
      "grad_norm": 0.4198797941207886,
      "learning_rate": 9.229104710623604e-06,
      "loss": 0.3402,
      "step": 1306
    },
    {
      "epoch": 0.1414,
      "grad_norm": 0.49302542209625244,
      "learning_rate": 9.227241529395127e-06,
      "loss": 0.3732,
      "step": 1307
    },
    {
      "epoch": 0.1416,
      "grad_norm": 1.178342580795288,
      "learning_rate": 9.225376287860484e-06,
      "loss": 0.3411,
      "step": 1308
    },
    {
      "epoch": 0.1418,
      "grad_norm": 0.639506995677948,
      "learning_rate": 9.223508986928766e-06,
      "loss": 0.3566,
      "step": 1309
    },
    {
      "epoch": 0.142,
      "grad_norm": 0.6560541987419128,
      "learning_rate": 9.221639627510076e-06,
      "loss": 0.3362,
      "step": 1310
    },
    {
      "epoch": 0.1422,
      "grad_norm": 0.5089212656021118,
      "learning_rate": 9.219768210515518e-06,
      "loss": 0.4079,
      "step": 1311
    },
    {
      "epoch": 0.1424,
      "grad_norm": 0.4156581163406372,
      "learning_rate": 9.217894736857195e-06,
      "loss": 0.32,
      "step": 1312
    },
    {
      "epoch": 0.1426,
      "grad_norm": 0.5293648838996887,
      "learning_rate": 9.216019207448216e-06,
      "loss": 0.365,
      "step": 1313
    },
    {
      "epoch": 0.1428,
      "grad_norm": 1.304078459739685,
      "learning_rate": 9.214141623202694e-06,
      "loss": 0.3474,
      "step": 1314
    },
    {
      "epoch": 0.143,
      "grad_norm": 0.4617994427680969,
      "learning_rate": 9.21226198503574e-06,
      "loss": 0.3599,
      "step": 1315
    },
    {
      "epoch": 0.1432,
      "grad_norm": 0.4395681321620941,
      "learning_rate": 9.210380293863462e-06,
      "loss": 0.3306,
      "step": 1316
    },
    {
      "epoch": 0.1434,
      "grad_norm": 0.5131776332855225,
      "learning_rate": 9.208496550602979e-06,
      "loss": 0.3293,
      "step": 1317
    },
    {
      "epoch": 0.1436,
      "grad_norm": 0.5541388988494873,
      "learning_rate": 9.206610756172402e-06,
      "loss": 0.3647,
      "step": 1318
    },
    {
      "epoch": 0.1438,
      "grad_norm": 0.49822744727134705,
      "learning_rate": 9.204722911490847e-06,
      "loss": 0.3661,
      "step": 1319
    },
    {
      "epoch": 0.144,
      "grad_norm": 0.5002879500389099,
      "learning_rate": 9.202833017478421e-06,
      "loss": 0.3373,
      "step": 1320
    },
    {
      "epoch": 0.1442,
      "grad_norm": 0.4681200385093689,
      "learning_rate": 9.200941075056242e-06,
      "loss": 0.3052,
      "step": 1321
    },
    {
      "epoch": 0.1444,
      "grad_norm": 0.5369657278060913,
      "learning_rate": 9.199047085146415e-06,
      "loss": 0.3321,
      "step": 1322
    },
    {
      "epoch": 0.1446,
      "grad_norm": 0.5156526565551758,
      "learning_rate": 9.197151048672051e-06,
      "loss": 0.3832,
      "step": 1323
    },
    {
      "epoch": 0.1448,
      "grad_norm": 0.4632044732570648,
      "learning_rate": 9.195252966557252e-06,
      "loss": 0.3337,
      "step": 1324
    },
    {
      "epoch": 0.145,
      "grad_norm": 0.4625380337238312,
      "learning_rate": 9.193352839727122e-06,
      "loss": 0.3656,
      "step": 1325
    },
    {
      "epoch": 0.1452,
      "grad_norm": 0.5683677196502686,
      "learning_rate": 9.191450669107758e-06,
      "loss": 0.3564,
      "step": 1326
    },
    {
      "epoch": 0.1454,
      "grad_norm": 0.48665279150009155,
      "learning_rate": 9.189546455626258e-06,
      "loss": 0.339,
      "step": 1327
    },
    {
      "epoch": 0.1456,
      "grad_norm": 0.5244611501693726,
      "learning_rate": 9.18764020021071e-06,
      "loss": 0.3473,
      "step": 1328
    },
    {
      "epoch": 0.1458,
      "grad_norm": 0.7250415682792664,
      "learning_rate": 9.1857319037902e-06,
      "loss": 0.3357,
      "step": 1329
    },
    {
      "epoch": 0.146,
      "grad_norm": 0.5230187177658081,
      "learning_rate": 9.18382156729481e-06,
      "loss": 0.3388,
      "step": 1330
    },
    {
      "epoch": 0.1462,
      "grad_norm": 0.5243784785270691,
      "learning_rate": 9.181909191655613e-06,
      "loss": 0.3677,
      "step": 1331
    },
    {
      "epoch": 0.1464,
      "grad_norm": 0.5332574844360352,
      "learning_rate": 9.179994777804677e-06,
      "loss": 0.3292,
      "step": 1332
    },
    {
      "epoch": 0.1466,
      "grad_norm": 0.4487501084804535,
      "learning_rate": 9.178078326675069e-06,
      "loss": 0.3119,
      "step": 1333
    },
    {
      "epoch": 0.1468,
      "grad_norm": 0.4659167528152466,
      "learning_rate": 9.176159839200838e-06,
      "loss": 0.3579,
      "step": 1334
    },
    {
      "epoch": 0.147,
      "grad_norm": 0.5439627766609192,
      "learning_rate": 9.174239316317034e-06,
      "loss": 0.4099,
      "step": 1335
    },
    {
      "epoch": 0.1472,
      "grad_norm": 0.6244337558746338,
      "learning_rate": 9.172316758959695e-06,
      "loss": 0.3599,
      "step": 1336
    },
    {
      "epoch": 0.1474,
      "grad_norm": 0.48105597496032715,
      "learning_rate": 9.170392168065858e-06,
      "loss": 0.3365,
      "step": 1337
    },
    {
      "epoch": 0.1476,
      "grad_norm": 0.7514434456825256,
      "learning_rate": 9.168465544573538e-06,
      "loss": 0.3676,
      "step": 1338
    },
    {
      "epoch": 0.1478,
      "grad_norm": 0.4782997965812683,
      "learning_rate": 9.16653688942175e-06,
      "loss": 0.3272,
      "step": 1339
    },
    {
      "epoch": 0.148,
      "grad_norm": 0.45920053124427795,
      "learning_rate": 9.164606203550498e-06,
      "loss": 0.3215,
      "step": 1340
    },
    {
      "epoch": 0.1482,
      "grad_norm": 0.4409297704696655,
      "learning_rate": 9.162673487900775e-06,
      "loss": 0.3289,
      "step": 1341
    },
    {
      "epoch": 0.1484,
      "grad_norm": 0.5328016877174377,
      "learning_rate": 9.160738743414564e-06,
      "loss": 0.3586,
      "step": 1342
    },
    {
      "epoch": 0.1486,
      "grad_norm": 0.5985413789749146,
      "learning_rate": 9.158801971034832e-06,
      "loss": 0.3311,
      "step": 1343
    },
    {
      "epoch": 0.1488,
      "grad_norm": 0.4521832764148712,
      "learning_rate": 9.156863171705543e-06,
      "loss": 0.3369,
      "step": 1344
    },
    {
      "epoch": 0.149,
      "grad_norm": 0.5994557738304138,
      "learning_rate": 9.154922346371641e-06,
      "loss": 0.3056,
      "step": 1345
    },
    {
      "epoch": 0.1492,
      "grad_norm": 0.52101069688797,
      "learning_rate": 9.152979495979064e-06,
      "loss": 0.3287,
      "step": 1346
    },
    {
      "epoch": 0.1494,
      "grad_norm": 0.7565995454788208,
      "learning_rate": 9.15103462147473e-06,
      "loss": 0.3644,
      "step": 1347
    },
    {
      "epoch": 0.1496,
      "grad_norm": 0.45401111245155334,
      "learning_rate": 9.14908772380655e-06,
      "loss": 0.3204,
      "step": 1348
    },
    {
      "epoch": 0.1498,
      "grad_norm": 0.4168843626976013,
      "learning_rate": 9.147138803923417e-06,
      "loss": 0.3119,
      "step": 1349
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.40914469957351685,
      "learning_rate": 9.145187862775208e-06,
      "loss": 0.3156,
      "step": 1350
    },
    {
      "epoch": 0.1502,
      "grad_norm": 0.47215166687965393,
      "learning_rate": 9.143234901312794e-06,
      "loss": 0.3472,
      "step": 1351
    },
    {
      "epoch": 0.1504,
      "grad_norm": 0.5127692818641663,
      "learning_rate": 9.141279920488021e-06,
      "loss": 0.3468,
      "step": 1352
    },
    {
      "epoch": 0.1506,
      "grad_norm": 0.49893131852149963,
      "learning_rate": 9.139322921253724e-06,
      "loss": 0.3525,
      "step": 1353
    },
    {
      "epoch": 0.1508,
      "grad_norm": 0.5732973217964172,
      "learning_rate": 9.13736390456372e-06,
      "loss": 0.3414,
      "step": 1354
    },
    {
      "epoch": 0.151,
      "grad_norm": 0.5180595517158508,
      "learning_rate": 9.13540287137281e-06,
      "loss": 0.3506,
      "step": 1355
    },
    {
      "epoch": 0.1512,
      "grad_norm": 0.5655452609062195,
      "learning_rate": 9.133439822636779e-06,
      "loss": 0.3297,
      "step": 1356
    },
    {
      "epoch": 0.1514,
      "grad_norm": 0.47065040469169617,
      "learning_rate": 9.13147475931239e-06,
      "loss": 0.3351,
      "step": 1357
    },
    {
      "epoch": 0.1516,
      "grad_norm": 0.4717150628566742,
      "learning_rate": 9.129507682357393e-06,
      "loss": 0.3536,
      "step": 1358
    },
    {
      "epoch": 0.1518,
      "grad_norm": 0.5047354698181152,
      "learning_rate": 9.12753859273052e-06,
      "loss": 0.3332,
      "step": 1359
    },
    {
      "epoch": 0.152,
      "grad_norm": 0.5332925915718079,
      "learning_rate": 9.125567491391476e-06,
      "loss": 0.3385,
      "step": 1360
    },
    {
      "epoch": 0.1522,
      "grad_norm": 0.4627508223056793,
      "learning_rate": 9.123594379300956e-06,
      "loss": 0.33,
      "step": 1361
    },
    {
      "epoch": 0.1524,
      "grad_norm": 0.4534182846546173,
      "learning_rate": 9.12161925742063e-06,
      "loss": 0.3819,
      "step": 1362
    },
    {
      "epoch": 0.1526,
      "grad_norm": 0.5553590655326843,
      "learning_rate": 9.119642126713147e-06,
      "loss": 0.3739,
      "step": 1363
    },
    {
      "epoch": 0.1528,
      "grad_norm": 0.5967604517936707,
      "learning_rate": 9.117662988142138e-06,
      "loss": 0.3902,
      "step": 1364
    },
    {
      "epoch": 0.153,
      "grad_norm": 0.48401570320129395,
      "learning_rate": 9.115681842672211e-06,
      "loss": 0.3535,
      "step": 1365
    },
    {
      "epoch": 0.1532,
      "grad_norm": 0.48710787296295166,
      "learning_rate": 9.11369869126895e-06,
      "loss": 0.369,
      "step": 1366
    },
    {
      "epoch": 0.1534,
      "grad_norm": 0.6741750836372375,
      "learning_rate": 9.111713534898923e-06,
      "loss": 0.3923,
      "step": 1367
    },
    {
      "epoch": 0.1536,
      "grad_norm": 0.5114241242408752,
      "learning_rate": 9.109726374529666e-06,
      "loss": 0.344,
      "step": 1368
    },
    {
      "epoch": 0.1538,
      "grad_norm": 0.47057589888572693,
      "learning_rate": 9.107737211129702e-06,
      "loss": 0.3504,
      "step": 1369
    },
    {
      "epoch": 0.154,
      "grad_norm": 0.5867481827735901,
      "learning_rate": 9.10574604566852e-06,
      "loss": 0.3545,
      "step": 1370
    },
    {
      "epoch": 0.1542,
      "grad_norm": 0.6643583178520203,
      "learning_rate": 9.103752879116595e-06,
      "loss": 0.3523,
      "step": 1371
    },
    {
      "epoch": 0.1544,
      "grad_norm": 0.5818706750869751,
      "learning_rate": 9.101757712445369e-06,
      "loss": 0.3088,
      "step": 1372
    },
    {
      "epoch": 0.1546,
      "grad_norm": 0.5499979257583618,
      "learning_rate": 9.099760546627262e-06,
      "loss": 0.352,
      "step": 1373
    },
    {
      "epoch": 0.1548,
      "grad_norm": 0.5359256267547607,
      "learning_rate": 9.09776138263567e-06,
      "loss": 0.3633,
      "step": 1374
    },
    {
      "epoch": 0.155,
      "grad_norm": 0.41454753279685974,
      "learning_rate": 9.09576022144496e-06,
      "loss": 0.3259,
      "step": 1375
    },
    {
      "epoch": 0.1552,
      "grad_norm": 0.4824369549751282,
      "learning_rate": 9.093757064030473e-06,
      "loss": 0.3193,
      "step": 1376
    },
    {
      "epoch": 0.1554,
      "grad_norm": 0.5250616073608398,
      "learning_rate": 9.091751911368524e-06,
      "loss": 0.3364,
      "step": 1377
    },
    {
      "epoch": 0.1556,
      "grad_norm": 0.6212247610092163,
      "learning_rate": 9.089744764436404e-06,
      "loss": 0.3796,
      "step": 1378
    },
    {
      "epoch": 0.1558,
      "grad_norm": 0.6382238268852234,
      "learning_rate": 9.087735624212365e-06,
      "loss": 0.3726,
      "step": 1379
    },
    {
      "epoch": 0.156,
      "grad_norm": 0.5789822340011597,
      "learning_rate": 9.085724491675642e-06,
      "loss": 0.3291,
      "step": 1380
    },
    {
      "epoch": 0.1562,
      "grad_norm": 0.5420401692390442,
      "learning_rate": 9.083711367806438e-06,
      "loss": 0.3726,
      "step": 1381
    },
    {
      "epoch": 0.1564,
      "grad_norm": 0.5666664242744446,
      "learning_rate": 9.08169625358592e-06,
      "loss": 0.3798,
      "step": 1382
    },
    {
      "epoch": 0.1566,
      "grad_norm": 0.5384038686752319,
      "learning_rate": 9.079679149996235e-06,
      "loss": 0.3433,
      "step": 1383
    },
    {
      "epoch": 0.1568,
      "grad_norm": 0.5053364038467407,
      "learning_rate": 9.077660058020492e-06,
      "loss": 0.3194,
      "step": 1384
    },
    {
      "epoch": 0.157,
      "grad_norm": 0.8359315991401672,
      "learning_rate": 9.07563897864277e-06,
      "loss": 0.3819,
      "step": 1385
    },
    {
      "epoch": 0.1572,
      "grad_norm": 0.515372633934021,
      "learning_rate": 9.073615912848126e-06,
      "loss": 0.3394,
      "step": 1386
    },
    {
      "epoch": 0.1574,
      "grad_norm": 0.501024067401886,
      "learning_rate": 9.07159086162257e-06,
      "loss": 0.3409,
      "step": 1387
    },
    {
      "epoch": 0.1576,
      "grad_norm": 0.4902344048023224,
      "learning_rate": 9.069563825953092e-06,
      "loss": 0.3692,
      "step": 1388
    },
    {
      "epoch": 0.1578,
      "grad_norm": 0.5104795694351196,
      "learning_rate": 9.06753480682764e-06,
      "loss": 0.3359,
      "step": 1389
    },
    {
      "epoch": 0.158,
      "grad_norm": 0.5503941178321838,
      "learning_rate": 9.065503805235139e-06,
      "loss": 0.3495,
      "step": 1390
    },
    {
      "epoch": 0.1582,
      "grad_norm": 0.5004388093948364,
      "learning_rate": 9.06347082216547e-06,
      "loss": 0.3467,
      "step": 1391
    },
    {
      "epoch": 0.1584,
      "grad_norm": 1.2659671306610107,
      "learning_rate": 9.061435858609486e-06,
      "loss": 0.3429,
      "step": 1392
    },
    {
      "epoch": 0.1586,
      "grad_norm": 0.5562512278556824,
      "learning_rate": 9.059398915559005e-06,
      "loss": 0.3598,
      "step": 1393
    },
    {
      "epoch": 0.1588,
      "grad_norm": 0.523776113986969,
      "learning_rate": 9.057359994006806e-06,
      "loss": 0.3663,
      "step": 1394
    },
    {
      "epoch": 0.159,
      "grad_norm": 1.3524843454360962,
      "learning_rate": 9.055319094946633e-06,
      "loss": 0.3486,
      "step": 1395
    },
    {
      "epoch": 0.1592,
      "grad_norm": 0.5289422273635864,
      "learning_rate": 9.0532762193732e-06,
      "loss": 0.3744,
      "step": 1396
    },
    {
      "epoch": 0.1594,
      "grad_norm": 0.46465757489204407,
      "learning_rate": 9.051231368282177e-06,
      "loss": 0.3534,
      "step": 1397
    },
    {
      "epoch": 0.1596,
      "grad_norm": 0.5002828240394592,
      "learning_rate": 9.0491845426702e-06,
      "loss": 0.3421,
      "step": 1398
    },
    {
      "epoch": 0.1598,
      "grad_norm": 0.49576571583747864,
      "learning_rate": 9.047135743534866e-06,
      "loss": 0.3926,
      "step": 1399
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.4967557489871979,
      "learning_rate": 9.045084971874738e-06,
      "loss": 0.3588,
      "step": 1400
    },
    {
      "epoch": 0.1602,
      "grad_norm": 0.6993545889854431,
      "learning_rate": 9.043032228689333e-06,
      "loss": 0.3229,
      "step": 1401
    },
    {
      "epoch": 0.1604,
      "grad_norm": 0.5397869348526001,
      "learning_rate": 9.040977514979136e-06,
      "loss": 0.3609,
      "step": 1402
    },
    {
      "epoch": 0.1606,
      "grad_norm": 0.45879480242729187,
      "learning_rate": 9.038920831745587e-06,
      "loss": 0.3532,
      "step": 1403
    },
    {
      "epoch": 0.1608,
      "grad_norm": 0.4710633158683777,
      "learning_rate": 9.036862179991092e-06,
      "loss": 0.3335,
      "step": 1404
    },
    {
      "epoch": 0.161,
      "grad_norm": 0.5307506322860718,
      "learning_rate": 9.03480156071901e-06,
      "loss": 0.3487,
      "step": 1405
    },
    {
      "epoch": 0.1612,
      "grad_norm": 0.4840256869792938,
      "learning_rate": 9.032738974933663e-06,
      "loss": 0.3556,
      "step": 1406
    },
    {
      "epoch": 0.1614,
      "grad_norm": 0.6070824861526489,
      "learning_rate": 9.03067442364033e-06,
      "loss": 0.3391,
      "step": 1407
    },
    {
      "epoch": 0.1616,
      "grad_norm": 0.46480968594551086,
      "learning_rate": 9.028607907845247e-06,
      "loss": 0.3551,
      "step": 1408
    },
    {
      "epoch": 0.1618,
      "grad_norm": 0.6158547401428223,
      "learning_rate": 9.026539428555609e-06,
      "loss": 0.3634,
      "step": 1409
    },
    {
      "epoch": 0.162,
      "grad_norm": 0.43423640727996826,
      "learning_rate": 9.02446898677957e-06,
      "loss": 0.373,
      "step": 1410
    },
    {
      "epoch": 0.1622,
      "grad_norm": 0.46258604526519775,
      "learning_rate": 9.022396583526238e-06,
      "loss": 0.3531,
      "step": 1411
    },
    {
      "epoch": 0.1624,
      "grad_norm": 0.6833035349845886,
      "learning_rate": 9.020322219805674e-06,
      "loss": 0.3283,
      "step": 1412
    },
    {
      "epoch": 0.1626,
      "grad_norm": 0.416453093290329,
      "learning_rate": 9.0182458966289e-06,
      "loss": 0.3393,
      "step": 1413
    },
    {
      "epoch": 0.1628,
      "grad_norm": 0.8942244648933411,
      "learning_rate": 9.01616761500789e-06,
      "loss": 0.3795,
      "step": 1414
    },
    {
      "epoch": 0.163,
      "grad_norm": 0.5765861868858337,
      "learning_rate": 9.014087375955574e-06,
      "loss": 0.3406,
      "step": 1415
    },
    {
      "epoch": 0.1632,
      "grad_norm": 0.5331302881240845,
      "learning_rate": 9.012005180485834e-06,
      "loss": 0.335,
      "step": 1416
    },
    {
      "epoch": 0.1634,
      "grad_norm": 0.7149733304977417,
      "learning_rate": 9.009921029613506e-06,
      "loss": 0.3376,
      "step": 1417
    },
    {
      "epoch": 0.1636,
      "grad_norm": 0.5127587914466858,
      "learning_rate": 9.007834924354384e-06,
      "loss": 0.3692,
      "step": 1418
    },
    {
      "epoch": 0.1638,
      "grad_norm": 0.4745359718799591,
      "learning_rate": 9.005746865725206e-06,
      "loss": 0.355,
      "step": 1419
    },
    {
      "epoch": 0.164,
      "grad_norm": 0.4405989944934845,
      "learning_rate": 9.003656854743667e-06,
      "loss": 0.3231,
      "step": 1420
    },
    {
      "epoch": 0.1642,
      "grad_norm": 0.44121667742729187,
      "learning_rate": 9.001564892428416e-06,
      "loss": 0.3175,
      "step": 1421
    },
    {
      "epoch": 0.1644,
      "grad_norm": 0.5551000237464905,
      "learning_rate": 8.999470979799048e-06,
      "loss": 0.3585,
      "step": 1422
    },
    {
      "epoch": 0.1646,
      "grad_norm": 0.41031062602996826,
      "learning_rate": 8.99737511787611e-06,
      "loss": 0.3368,
      "step": 1423
    },
    {
      "epoch": 0.1648,
      "grad_norm": 0.5918021202087402,
      "learning_rate": 8.9952773076811e-06,
      "loss": 0.3679,
      "step": 1424
    },
    {
      "epoch": 0.165,
      "grad_norm": 0.44656097888946533,
      "learning_rate": 8.993177550236464e-06,
      "loss": 0.3052,
      "step": 1425
    },
    {
      "epoch": 0.1652,
      "grad_norm": 0.48767155408859253,
      "learning_rate": 8.991075846565603e-06,
      "loss": 0.3651,
      "step": 1426
    },
    {
      "epoch": 0.1654,
      "grad_norm": 0.7927563190460205,
      "learning_rate": 8.988972197692857e-06,
      "loss": 0.3415,
      "step": 1427
    },
    {
      "epoch": 0.1656,
      "grad_norm": 0.5172145366668701,
      "learning_rate": 8.986866604643518e-06,
      "loss": 0.3502,
      "step": 1428
    },
    {
      "epoch": 0.1658,
      "grad_norm": 0.5324118137359619,
      "learning_rate": 8.984759068443832e-06,
      "loss": 0.3487,
      "step": 1429
    },
    {
      "epoch": 0.166,
      "grad_norm": 0.598042905330658,
      "learning_rate": 8.982649590120982e-06,
      "loss": 0.3512,
      "step": 1430
    },
    {
      "epoch": 0.1662,
      "grad_norm": 0.5054885149002075,
      "learning_rate": 8.980538170703104e-06,
      "loss": 0.3489,
      "step": 1431
    },
    {
      "epoch": 0.1664,
      "grad_norm": 0.5864490270614624,
      "learning_rate": 8.978424811219277e-06,
      "loss": 0.3779,
      "step": 1432
    },
    {
      "epoch": 0.1666,
      "grad_norm": 0.5326104760169983,
      "learning_rate": 8.97630951269953e-06,
      "loss": 0.3763,
      "step": 1433
    },
    {
      "epoch": 0.1668,
      "grad_norm": 0.4559936225414276,
      "learning_rate": 8.97419227617483e-06,
      "loss": 0.3661,
      "step": 1434
    },
    {
      "epoch": 0.167,
      "grad_norm": 0.45844346284866333,
      "learning_rate": 8.972073102677091e-06,
      "loss": 0.3576,
      "step": 1435
    },
    {
      "epoch": 0.1672,
      "grad_norm": 1.2076220512390137,
      "learning_rate": 8.969951993239177e-06,
      "loss": 0.3815,
      "step": 1436
    },
    {
      "epoch": 0.1674,
      "grad_norm": 0.5038664937019348,
      "learning_rate": 8.96782894889489e-06,
      "loss": 0.3508,
      "step": 1437
    },
    {
      "epoch": 0.1676,
      "grad_norm": 0.45797279477119446,
      "learning_rate": 8.965703970678974e-06,
      "loss": 0.3269,
      "step": 1438
    },
    {
      "epoch": 0.1678,
      "grad_norm": 0.5749474167823792,
      "learning_rate": 8.963577059627117e-06,
      "loss": 0.3543,
      "step": 1439
    },
    {
      "epoch": 0.168,
      "grad_norm": 0.4895762503147125,
      "learning_rate": 8.961448216775955e-06,
      "loss": 0.3441,
      "step": 1440
    },
    {
      "epoch": 0.1682,
      "grad_norm": 0.5816234350204468,
      "learning_rate": 8.959317443163054e-06,
      "loss": 0.3653,
      "step": 1441
    },
    {
      "epoch": 0.1684,
      "grad_norm": 0.491860568523407,
      "learning_rate": 8.957184739826929e-06,
      "loss": 0.3407,
      "step": 1442
    },
    {
      "epoch": 0.1686,
      "grad_norm": 0.517346203327179,
      "learning_rate": 8.955050107807035e-06,
      "loss": 0.3384,
      "step": 1443
    },
    {
      "epoch": 0.1688,
      "grad_norm": 0.5029215812683105,
      "learning_rate": 8.952913548143766e-06,
      "loss": 0.3478,
      "step": 1444
    },
    {
      "epoch": 0.169,
      "grad_norm": 0.514972448348999,
      "learning_rate": 8.950775061878453e-06,
      "loss": 0.353,
      "step": 1445
    },
    {
      "epoch": 0.1692,
      "grad_norm": 0.44176697731018066,
      "learning_rate": 8.94863465005337e-06,
      "loss": 0.3048,
      "step": 1446
    },
    {
      "epoch": 0.1694,
      "grad_norm": 0.4624730050563812,
      "learning_rate": 8.946492313711725e-06,
      "loss": 0.317,
      "step": 1447
    },
    {
      "epoch": 0.1696,
      "grad_norm": 0.5126705765724182,
      "learning_rate": 8.944348053897672e-06,
      "loss": 0.3489,
      "step": 1448
    },
    {
      "epoch": 0.1698,
      "grad_norm": 0.543292760848999,
      "learning_rate": 8.942201871656292e-06,
      "loss": 0.3706,
      "step": 1449
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.46403396129608154,
      "learning_rate": 8.94005376803361e-06,
      "loss": 0.3422,
      "step": 1450
    },
    {
      "epoch": 0.1702,
      "grad_norm": 0.5027287006378174,
      "learning_rate": 8.937903744076587e-06,
      "loss": 0.3358,
      "step": 1451
    },
    {
      "epoch": 0.1704,
      "grad_norm": 0.4930799603462219,
      "learning_rate": 8.935751800833117e-06,
      "loss": 0.3528,
      "step": 1452
    },
    {
      "epoch": 0.1706,
      "grad_norm": 0.4035414755344391,
      "learning_rate": 8.933597939352031e-06,
      "loss": 0.3549,
      "step": 1453
    },
    {
      "epoch": 0.1708,
      "grad_norm": 0.5606353282928467,
      "learning_rate": 8.931442160683094e-06,
      "loss": 0.3606,
      "step": 1454
    },
    {
      "epoch": 0.171,
      "grad_norm": 0.4489575922489166,
      "learning_rate": 8.92928446587701e-06,
      "loss": 0.3567,
      "step": 1455
    },
    {
      "epoch": 0.1712,
      "grad_norm": 0.5807196497917175,
      "learning_rate": 8.92712485598541e-06,
      "loss": 0.3559,
      "step": 1456
    },
    {
      "epoch": 0.1714,
      "grad_norm": 0.5539586544036865,
      "learning_rate": 8.924963332060863e-06,
      "loss": 0.3403,
      "step": 1457
    },
    {
      "epoch": 0.1716,
      "grad_norm": 0.44856351613998413,
      "learning_rate": 8.922799895156868e-06,
      "loss": 0.3282,
      "step": 1458
    },
    {
      "epoch": 0.1718,
      "grad_norm": 0.663634181022644,
      "learning_rate": 8.920634546327857e-06,
      "loss": 0.3548,
      "step": 1459
    },
    {
      "epoch": 0.172,
      "grad_norm": 0.5596919059753418,
      "learning_rate": 8.9184672866292e-06,
      "loss": 0.3561,
      "step": 1460
    },
    {
      "epoch": 0.1722,
      "grad_norm": 0.5033104419708252,
      "learning_rate": 8.916298117117188e-06,
      "loss": 0.3444,
      "step": 1461
    },
    {
      "epoch": 0.1724,
      "grad_norm": 0.533202588558197,
      "learning_rate": 8.91412703884905e-06,
      "loss": 0.358,
      "step": 1462
    },
    {
      "epoch": 0.1726,
      "grad_norm": 0.5246691107749939,
      "learning_rate": 8.911954052882941e-06,
      "loss": 0.3229,
      "step": 1463
    },
    {
      "epoch": 0.1728,
      "grad_norm": 0.4379603862762451,
      "learning_rate": 8.909779160277951e-06,
      "loss": 0.3148,
      "step": 1464
    },
    {
      "epoch": 0.173,
      "grad_norm": 0.5789588093757629,
      "learning_rate": 8.907602362094094e-06,
      "loss": 0.3107,
      "step": 1465
    },
    {
      "epoch": 0.1732,
      "grad_norm": 0.42621514201164246,
      "learning_rate": 8.905423659392316e-06,
      "loss": 0.3801,
      "step": 1466
    },
    {
      "epoch": 0.1734,
      "grad_norm": 0.5371299386024475,
      "learning_rate": 8.903243053234492e-06,
      "loss": 0.3428,
      "step": 1467
    },
    {
      "epoch": 0.1736,
      "grad_norm": 0.5730862021446228,
      "learning_rate": 8.90106054468342e-06,
      "loss": 0.3445,
      "step": 1468
    },
    {
      "epoch": 0.1738,
      "grad_norm": 0.8014522790908813,
      "learning_rate": 8.898876134802827e-06,
      "loss": 0.3773,
      "step": 1469
    },
    {
      "epoch": 0.174,
      "grad_norm": 0.46499118208885193,
      "learning_rate": 8.896689824657371e-06,
      "loss": 0.3402,
      "step": 1470
    },
    {
      "epoch": 0.1742,
      "grad_norm": 0.5199479460716248,
      "learning_rate": 8.894501615312633e-06,
      "loss": 0.3008,
      "step": 1471
    },
    {
      "epoch": 0.1744,
      "grad_norm": 0.5163127183914185,
      "learning_rate": 8.892311507835118e-06,
      "loss": 0.3104,
      "step": 1472
    },
    {
      "epoch": 0.1746,
      "grad_norm": 0.46663767099380493,
      "learning_rate": 8.890119503292258e-06,
      "loss": 0.3257,
      "step": 1473
    },
    {
      "epoch": 0.1748,
      "grad_norm": 0.5678489208221436,
      "learning_rate": 8.887925602752411e-06,
      "loss": 0.3617,
      "step": 1474
    },
    {
      "epoch": 0.175,
      "grad_norm": 0.4474494755268097,
      "learning_rate": 8.885729807284855e-06,
      "loss": 0.3393,
      "step": 1475
    },
    {
      "epoch": 0.1752,
      "grad_norm": 0.4421643018722534,
      "learning_rate": 8.883532117959797e-06,
      "loss": 0.3261,
      "step": 1476
    },
    {
      "epoch": 0.1754,
      "grad_norm": 0.7208912372589111,
      "learning_rate": 8.88133253584836e-06,
      "loss": 0.351,
      "step": 1477
    },
    {
      "epoch": 0.1756,
      "grad_norm": 0.6244512796401978,
      "learning_rate": 8.879131062022598e-06,
      "loss": 0.3475,
      "step": 1478
    },
    {
      "epoch": 0.1758,
      "grad_norm": 0.5000828504562378,
      "learning_rate": 8.87692769755548e-06,
      "loss": 0.3781,
      "step": 1479
    },
    {
      "epoch": 0.176,
      "grad_norm": 1.9115983247756958,
      "learning_rate": 8.874722443520898e-06,
      "loss": 0.3171,
      "step": 1480
    },
    {
      "epoch": 0.1762,
      "grad_norm": 0.5985187888145447,
      "learning_rate": 8.872515300993669e-06,
      "loss": 0.3385,
      "step": 1481
    },
    {
      "epoch": 0.1764,
      "grad_norm": 0.4714076817035675,
      "learning_rate": 8.870306271049527e-06,
      "loss": 0.3339,
      "step": 1482
    },
    {
      "epoch": 0.1766,
      "grad_norm": 0.4844547212123871,
      "learning_rate": 8.868095354765125e-06,
      "loss": 0.3343,
      "step": 1483
    },
    {
      "epoch": 0.1768,
      "grad_norm": 0.4193803668022156,
      "learning_rate": 8.865882553218036e-06,
      "loss": 0.3306,
      "step": 1484
    },
    {
      "epoch": 0.177,
      "grad_norm": 0.5723409652709961,
      "learning_rate": 8.863667867486756e-06,
      "loss": 0.3559,
      "step": 1485
    },
    {
      "epoch": 0.1772,
      "grad_norm": 0.49045658111572266,
      "learning_rate": 8.861451298650692e-06,
      "loss": 0.3349,
      "step": 1486
    },
    {
      "epoch": 0.1774,
      "grad_norm": 0.5560157895088196,
      "learning_rate": 8.859232847790175e-06,
      "loss": 0.3321,
      "step": 1487
    },
    {
      "epoch": 0.1776,
      "grad_norm": 0.7927626967430115,
      "learning_rate": 8.857012515986452e-06,
      "loss": 0.3505,
      "step": 1488
    },
    {
      "epoch": 0.1778,
      "grad_norm": 0.5235140919685364,
      "learning_rate": 8.854790304321682e-06,
      "loss": 0.368,
      "step": 1489
    },
    {
      "epoch": 0.178,
      "grad_norm": 0.6090984344482422,
      "learning_rate": 8.852566213878947e-06,
      "loss": 0.3455,
      "step": 1490
    },
    {
      "epoch": 0.1782,
      "grad_norm": 0.49945929646492004,
      "learning_rate": 8.85034024574224e-06,
      "loss": 0.3406,
      "step": 1491
    },
    {
      "epoch": 0.1784,
      "grad_norm": 0.5901384353637695,
      "learning_rate": 8.848112400996473e-06,
      "loss": 0.3441,
      "step": 1492
    },
    {
      "epoch": 0.1786,
      "grad_norm": 0.5020262002944946,
      "learning_rate": 8.84588268072747e-06,
      "loss": 0.3474,
      "step": 1493
    },
    {
      "epoch": 0.1788,
      "grad_norm": 0.44792965054512024,
      "learning_rate": 8.843651086021966e-06,
      "loss": 0.329,
      "step": 1494
    },
    {
      "epoch": 0.179,
      "grad_norm": 0.5115306377410889,
      "learning_rate": 8.841417617967618e-06,
      "loss": 0.3499,
      "step": 1495
    },
    {
      "epoch": 0.1792,
      "grad_norm": 0.5791764855384827,
      "learning_rate": 8.83918227765299e-06,
      "loss": 0.3597,
      "step": 1496
    },
    {
      "epoch": 0.1794,
      "grad_norm": 0.43535682559013367,
      "learning_rate": 8.836945066167556e-06,
      "loss": 0.2931,
      "step": 1497
    },
    {
      "epoch": 0.1796,
      "grad_norm": 0.5271389484405518,
      "learning_rate": 8.834705984601708e-06,
      "loss": 0.3,
      "step": 1498
    },
    {
      "epoch": 0.1798,
      "grad_norm": 0.5324931740760803,
      "learning_rate": 8.83246503404675e-06,
      "loss": 0.3427,
      "step": 1499
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.7538367509841919,
      "learning_rate": 8.83022221559489e-06,
      "loss": 0.3547,
      "step": 1500
    },
    {
      "epoch": 0.1802,
      "grad_norm": 0.37248221039772034,
      "learning_rate": 8.827977530339254e-06,
      "loss": 0.331,
      "step": 1501
    },
    {
      "epoch": 0.1804,
      "grad_norm": 0.6131284236907959,
      "learning_rate": 8.825730979373873e-06,
      "loss": 0.3468,
      "step": 1502
    },
    {
      "epoch": 0.1806,
      "grad_norm": 0.5191988945007324,
      "learning_rate": 8.823482563793687e-06,
      "loss": 0.3369,
      "step": 1503
    },
    {
      "epoch": 0.1808,
      "grad_norm": 0.5966916084289551,
      "learning_rate": 8.821232284694545e-06,
      "loss": 0.373,
      "step": 1504
    },
    {
      "epoch": 0.181,
      "grad_norm": 0.429352343082428,
      "learning_rate": 8.818980143173212e-06,
      "loss": 0.3408,
      "step": 1505
    },
    {
      "epoch": 0.1812,
      "grad_norm": 0.4804506301879883,
      "learning_rate": 8.81672614032735e-06,
      "loss": 0.339,
      "step": 1506
    },
    {
      "epoch": 0.1814,
      "grad_norm": 0.5173071026802063,
      "learning_rate": 8.814470277255532e-06,
      "loss": 0.3729,
      "step": 1507
    },
    {
      "epoch": 0.1816,
      "grad_norm": 0.48913249373435974,
      "learning_rate": 8.81221255505724e-06,
      "loss": 0.362,
      "step": 1508
    },
    {
      "epoch": 0.1818,
      "grad_norm": 0.5677952766418457,
      "learning_rate": 8.80995297483286e-06,
      "loss": 0.3348,
      "step": 1509
    },
    {
      "epoch": 0.182,
      "grad_norm": 0.5279467701911926,
      "learning_rate": 8.807691537683685e-06,
      "loss": 0.3522,
      "step": 1510
    },
    {
      "epoch": 0.1822,
      "grad_norm": 0.5581727027893066,
      "learning_rate": 8.80542824471191e-06,
      "loss": 0.3192,
      "step": 1511
    },
    {
      "epoch": 0.1824,
      "grad_norm": 0.4632508456707001,
      "learning_rate": 8.803163097020637e-06,
      "loss": 0.3145,
      "step": 1512
    },
    {
      "epoch": 0.1826,
      "grad_norm": 0.6467320919036865,
      "learning_rate": 8.80089609571387e-06,
      "loss": 0.3367,
      "step": 1513
    },
    {
      "epoch": 0.1828,
      "grad_norm": 0.4820963144302368,
      "learning_rate": 8.798627241896524e-06,
      "loss": 0.3282,
      "step": 1514
    },
    {
      "epoch": 0.183,
      "grad_norm": 0.39120522141456604,
      "learning_rate": 8.796356536674404e-06,
      "loss": 0.3468,
      "step": 1515
    },
    {
      "epoch": 0.1832,
      "grad_norm": 0.5784851908683777,
      "learning_rate": 8.794083981154229e-06,
      "loss": 0.3431,
      "step": 1516
    },
    {
      "epoch": 0.1834,
      "grad_norm": 0.4888552129268646,
      "learning_rate": 8.791809576443611e-06,
      "loss": 0.3318,
      "step": 1517
    },
    {
      "epoch": 0.1836,
      "grad_norm": 0.4326871633529663,
      "learning_rate": 8.789533323651067e-06,
      "loss": 0.3387,
      "step": 1518
    },
    {
      "epoch": 0.1838,
      "grad_norm": 0.4484288990497589,
      "learning_rate": 8.78725522388602e-06,
      "loss": 0.3515,
      "step": 1519
    },
    {
      "epoch": 0.184,
      "grad_norm": 0.6663259863853455,
      "learning_rate": 8.784975278258783e-06,
      "loss": 0.3556,
      "step": 1520
    },
    {
      "epoch": 0.1842,
      "grad_norm": 0.39237311482429504,
      "learning_rate": 8.782693487880575e-06,
      "loss": 0.349,
      "step": 1521
    },
    {
      "epoch": 0.1844,
      "grad_norm": 0.506321370601654,
      "learning_rate": 8.780409853863517e-06,
      "loss": 0.3413,
      "step": 1522
    },
    {
      "epoch": 0.1846,
      "grad_norm": 0.46105748414993286,
      "learning_rate": 8.778124377320619e-06,
      "loss": 0.3302,
      "step": 1523
    },
    {
      "epoch": 0.1848,
      "grad_norm": 0.6444749236106873,
      "learning_rate": 8.775837059365796e-06,
      "loss": 0.3392,
      "step": 1524
    },
    {
      "epoch": 0.185,
      "grad_norm": 0.5113400816917419,
      "learning_rate": 8.773547901113862e-06,
      "loss": 0.3468,
      "step": 1525
    },
    {
      "epoch": 0.1852,
      "grad_norm": 0.45260587334632874,
      "learning_rate": 8.77125690368052e-06,
      "loss": 0.3352,
      "step": 1526
    },
    {
      "epoch": 0.1854,
      "grad_norm": 0.5071515440940857,
      "learning_rate": 8.768964068182378e-06,
      "loss": 0.3707,
      "step": 1527
    },
    {
      "epoch": 0.1856,
      "grad_norm": 0.5277918577194214,
      "learning_rate": 8.766669395736936e-06,
      "loss": 0.343,
      "step": 1528
    },
    {
      "epoch": 0.1858,
      "grad_norm": 0.45846760272979736,
      "learning_rate": 8.764372887462587e-06,
      "loss": 0.3785,
      "step": 1529
    },
    {
      "epoch": 0.186,
      "grad_norm": 0.4372290372848511,
      "learning_rate": 8.762074544478622e-06,
      "loss": 0.3255,
      "step": 1530
    },
    {
      "epoch": 0.1862,
      "grad_norm": 0.6501349210739136,
      "learning_rate": 8.759774367905228e-06,
      "loss": 0.3318,
      "step": 1531
    },
    {
      "epoch": 0.1864,
      "grad_norm": 0.8483263254165649,
      "learning_rate": 8.757472358863481e-06,
      "loss": 0.3424,
      "step": 1532
    },
    {
      "epoch": 0.1866,
      "grad_norm": 0.46909716725349426,
      "learning_rate": 8.755168518475351e-06,
      "loss": 0.3392,
      "step": 1533
    },
    {
      "epoch": 0.1868,
      "grad_norm": 0.7201991081237793,
      "learning_rate": 8.752862847863707e-06,
      "loss": 0.3448,
      "step": 1534
    },
    {
      "epoch": 0.187,
      "grad_norm": 1.540327787399292,
      "learning_rate": 8.750555348152299e-06,
      "loss": 0.3344,
      "step": 1535
    },
    {
      "epoch": 0.1872,
      "grad_norm": 0.7253796458244324,
      "learning_rate": 8.748246020465776e-06,
      "loss": 0.3397,
      "step": 1536
    },
    {
      "epoch": 0.1874,
      "grad_norm": 0.5424474477767944,
      "learning_rate": 8.745934865929676e-06,
      "loss": 0.36,
      "step": 1537
    },
    {
      "epoch": 0.1876,
      "grad_norm": 0.5366278886795044,
      "learning_rate": 8.743621885670431e-06,
      "loss": 0.3563,
      "step": 1538
    },
    {
      "epoch": 0.1878,
      "grad_norm": 0.4850046932697296,
      "learning_rate": 8.741307080815357e-06,
      "loss": 0.3523,
      "step": 1539
    },
    {
      "epoch": 0.188,
      "grad_norm": 0.4972448945045471,
      "learning_rate": 8.73899045249266e-06,
      "loss": 0.3679,
      "step": 1540
    },
    {
      "epoch": 0.1882,
      "grad_norm": 0.4355808198451996,
      "learning_rate": 8.736672001831438e-06,
      "loss": 0.3042,
      "step": 1541
    },
    {
      "epoch": 0.1884,
      "grad_norm": 0.5281969904899597,
      "learning_rate": 8.73435172996168e-06,
      "loss": 0.3562,
      "step": 1542
    },
    {
      "epoch": 0.1886,
      "grad_norm": 0.5372441411018372,
      "learning_rate": 8.732029638014249e-06,
      "loss": 0.3636,
      "step": 1543
    },
    {
      "epoch": 0.1888,
      "grad_norm": 0.4205263555049896,
      "learning_rate": 8.729705727120911e-06,
      "loss": 0.3334,
      "step": 1544
    },
    {
      "epoch": 0.189,
      "grad_norm": 0.523960530757904,
      "learning_rate": 8.727379998414311e-06,
      "loss": 0.3413,
      "step": 1545
    },
    {
      "epoch": 0.1892,
      "grad_norm": 0.5252153873443604,
      "learning_rate": 8.725052453027982e-06,
      "loss": 0.3719,
      "step": 1546
    },
    {
      "epoch": 0.1894,
      "grad_norm": 0.5083900094032288,
      "learning_rate": 8.722723092096337e-06,
      "loss": 0.3415,
      "step": 1547
    },
    {
      "epoch": 0.1896,
      "grad_norm": 0.49753403663635254,
      "learning_rate": 8.720391916754683e-06,
      "loss": 0.3475,
      "step": 1548
    },
    {
      "epoch": 0.1898,
      "grad_norm": 0.4648216962814331,
      "learning_rate": 8.718058928139205e-06,
      "loss": 0.3648,
      "step": 1549
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.43836188316345215,
      "learning_rate": 8.715724127386971e-06,
      "loss": 0.3519,
      "step": 1550
    },
    {
      "epoch": 0.1902,
      "grad_norm": 0.45395219326019287,
      "learning_rate": 8.713387515635938e-06,
      "loss": 0.3446,
      "step": 1551
    },
    {
      "epoch": 0.1904,
      "grad_norm": 0.4841509759426117,
      "learning_rate": 8.711049094024942e-06,
      "loss": 0.32,
      "step": 1552
    },
    {
      "epoch": 0.1906,
      "grad_norm": 0.49055275321006775,
      "learning_rate": 8.708708863693696e-06,
      "loss": 0.3503,
      "step": 1553
    },
    {
      "epoch": 0.1908,
      "grad_norm": 0.43740609288215637,
      "learning_rate": 8.706366825782805e-06,
      "loss": 0.3666,
      "step": 1554
    },
    {
      "epoch": 0.191,
      "grad_norm": 0.5273027420043945,
      "learning_rate": 8.70402298143375e-06,
      "loss": 0.3404,
      "step": 1555
    },
    {
      "epoch": 0.1912,
      "grad_norm": 0.4332776963710785,
      "learning_rate": 8.701677331788891e-06,
      "loss": 0.3209,
      "step": 1556
    },
    {
      "epoch": 0.1914,
      "grad_norm": 0.5258138179779053,
      "learning_rate": 8.699329877991469e-06,
      "loss": 0.3531,
      "step": 1557
    },
    {
      "epoch": 0.1916,
      "grad_norm": 0.7787679433822632,
      "learning_rate": 8.696980621185602e-06,
      "loss": 0.3276,
      "step": 1558
    },
    {
      "epoch": 0.1918,
      "grad_norm": 0.43223655223846436,
      "learning_rate": 8.694629562516295e-06,
      "loss": 0.3468,
      "step": 1559
    },
    {
      "epoch": 0.192,
      "grad_norm": 1.7166944742202759,
      "learning_rate": 8.692276703129421e-06,
      "loss": 0.3793,
      "step": 1560
    },
    {
      "epoch": 0.1922,
      "grad_norm": 0.5654975771903992,
      "learning_rate": 8.689922044171735e-06,
      "loss": 0.3517,
      "step": 1561
    },
    {
      "epoch": 0.1924,
      "grad_norm": 0.6576845645904541,
      "learning_rate": 8.68756558679087e-06,
      "loss": 0.3621,
      "step": 1562
    },
    {
      "epoch": 0.1926,
      "grad_norm": 0.7618386745452881,
      "learning_rate": 8.685207332135337e-06,
      "loss": 0.358,
      "step": 1563
    },
    {
      "epoch": 0.1928,
      "grad_norm": 0.47903573513031006,
      "learning_rate": 8.682847281354517e-06,
      "loss": 0.3565,
      "step": 1564
    },
    {
      "epoch": 0.193,
      "grad_norm": 0.564659595489502,
      "learning_rate": 8.680485435598674e-06,
      "loss": 0.3644,
      "step": 1565
    },
    {
      "epoch": 0.1932,
      "grad_norm": 0.46219170093536377,
      "learning_rate": 8.678121796018938e-06,
      "loss": 0.3447,
      "step": 1566
    },
    {
      "epoch": 0.1934,
      "grad_norm": 0.5004449486732483,
      "learning_rate": 8.675756363767322e-06,
      "loss": 0.3921,
      "step": 1567
    },
    {
      "epoch": 0.1936,
      "grad_norm": 0.4968661367893219,
      "learning_rate": 8.673389139996708e-06,
      "loss": 0.3316,
      "step": 1568
    },
    {
      "epoch": 0.1938,
      "grad_norm": 0.5901411175727844,
      "learning_rate": 8.671020125860851e-06,
      "loss": 0.36,
      "step": 1569
    },
    {
      "epoch": 0.194,
      "grad_norm": 0.49066323041915894,
      "learning_rate": 8.668649322514382e-06,
      "loss": 0.3671,
      "step": 1570
    },
    {
      "epoch": 0.1942,
      "grad_norm": 0.9031388163566589,
      "learning_rate": 8.666276731112802e-06,
      "loss": 0.3601,
      "step": 1571
    },
    {
      "epoch": 0.1944,
      "grad_norm": 0.616798996925354,
      "learning_rate": 8.66390235281248e-06,
      "loss": 0.4032,
      "step": 1572
    },
    {
      "epoch": 0.1946,
      "grad_norm": 0.4659276306629181,
      "learning_rate": 8.66152618877066e-06,
      "loss": 0.3546,
      "step": 1573
    },
    {
      "epoch": 0.1948,
      "grad_norm": 0.5001726150512695,
      "learning_rate": 8.659148240145456e-06,
      "loss": 0.3374,
      "step": 1574
    },
    {
      "epoch": 0.195,
      "grad_norm": 0.4472518563270569,
      "learning_rate": 8.656768508095853e-06,
      "loss": 0.3387,
      "step": 1575
    },
    {
      "epoch": 0.1952,
      "grad_norm": 0.5096551179885864,
      "learning_rate": 8.654386993781703e-06,
      "loss": 0.3745,
      "step": 1576
    },
    {
      "epoch": 0.1954,
      "grad_norm": 0.456449031829834,
      "learning_rate": 8.652003698363724e-06,
      "loss": 0.3751,
      "step": 1577
    },
    {
      "epoch": 0.1956,
      "grad_norm": 0.5474330186843872,
      "learning_rate": 8.649618623003509e-06,
      "loss": 0.3339,
      "step": 1578
    },
    {
      "epoch": 0.1958,
      "grad_norm": 0.49623602628707886,
      "learning_rate": 8.647231768863513e-06,
      "loss": 0.3608,
      "step": 1579
    },
    {
      "epoch": 0.196,
      "grad_norm": 0.5748809576034546,
      "learning_rate": 8.644843137107058e-06,
      "loss": 0.3469,
      "step": 1580
    },
    {
      "epoch": 0.1962,
      "grad_norm": 0.4286330044269562,
      "learning_rate": 8.642452728898339e-06,
      "loss": 0.3516,
      "step": 1581
    },
    {
      "epoch": 0.1964,
      "grad_norm": 0.46741199493408203,
      "learning_rate": 8.640060545402407e-06,
      "loss": 0.3484,
      "step": 1582
    },
    {
      "epoch": 0.1966,
      "grad_norm": 0.4238349199295044,
      "learning_rate": 8.637666587785185e-06,
      "loss": 0.3109,
      "step": 1583
    },
    {
      "epoch": 0.1968,
      "grad_norm": 0.4741043746471405,
      "learning_rate": 8.63527085721346e-06,
      "loss": 0.3243,
      "step": 1584
    },
    {
      "epoch": 0.197,
      "grad_norm": 1.2729978561401367,
      "learning_rate": 8.632873354854881e-06,
      "loss": 0.3752,
      "step": 1585
    },
    {
      "epoch": 0.1972,
      "grad_norm": 0.42844104766845703,
      "learning_rate": 8.630474081877959e-06,
      "loss": 0.3819,
      "step": 1586
    },
    {
      "epoch": 0.1974,
      "grad_norm": 0.5586843490600586,
      "learning_rate": 8.628073039452076e-06,
      "loss": 0.3551,
      "step": 1587
    },
    {
      "epoch": 0.1976,
      "grad_norm": 0.6659725308418274,
      "learning_rate": 8.625670228747467e-06,
      "loss": 0.3746,
      "step": 1588
    },
    {
      "epoch": 0.1978,
      "grad_norm": 0.6310418844223022,
      "learning_rate": 8.623265650935233e-06,
      "loss": 0.3288,
      "step": 1589
    },
    {
      "epoch": 0.198,
      "grad_norm": 0.5414645075798035,
      "learning_rate": 8.620859307187339e-06,
      "loss": 0.3102,
      "step": 1590
    },
    {
      "epoch": 0.1982,
      "grad_norm": 0.38679689168930054,
      "learning_rate": 8.618451198676602e-06,
      "loss": 0.3087,
      "step": 1591
    },
    {
      "epoch": 0.1984,
      "grad_norm": 0.6964982748031616,
      "learning_rate": 8.616041326576711e-06,
      "loss": 0.3586,
      "step": 1592
    },
    {
      "epoch": 0.1986,
      "grad_norm": 0.4741411507129669,
      "learning_rate": 8.613629692062204e-06,
      "loss": 0.3353,
      "step": 1593
    },
    {
      "epoch": 0.1988,
      "grad_norm": 0.5389928817749023,
      "learning_rate": 8.611216296308485e-06,
      "loss": 0.3488,
      "step": 1594
    },
    {
      "epoch": 0.199,
      "grad_norm": 0.9280518293380737,
      "learning_rate": 8.608801140491811e-06,
      "loss": 0.3491,
      "step": 1595
    },
    {
      "epoch": 0.1992,
      "grad_norm": 0.5060313940048218,
      "learning_rate": 8.606384225789304e-06,
      "loss": 0.3708,
      "step": 1596
    },
    {
      "epoch": 0.1994,
      "grad_norm": 0.519513726234436,
      "learning_rate": 8.603965553378934e-06,
      "loss": 0.3316,
      "step": 1597
    },
    {
      "epoch": 0.1996,
      "grad_norm": 0.46185046434402466,
      "learning_rate": 8.601545124439535e-06,
      "loss": 0.3189,
      "step": 1598
    },
    {
      "epoch": 0.1998,
      "grad_norm": 0.43349915742874146,
      "learning_rate": 8.599122940150795e-06,
      "loss": 0.323,
      "step": 1599
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.5046770572662354,
      "learning_rate": 8.596699001693257e-06,
      "loss": 0.3522,
      "step": 1600
    },
    {
      "epoch": 0.2002,
      "grad_norm": 0.47522425651550293,
      "learning_rate": 8.594273310248317e-06,
      "loss": 0.3432,
      "step": 1601
    },
    {
      "epoch": 0.2004,
      "grad_norm": 0.5936286449432373,
      "learning_rate": 8.591845866998231e-06,
      "loss": 0.3697,
      "step": 1602
    },
    {
      "epoch": 0.2006,
      "grad_norm": 0.5084104537963867,
      "learning_rate": 8.589416673126104e-06,
      "loss": 0.3422,
      "step": 1603
    },
    {
      "epoch": 0.2008,
      "grad_norm": 0.627119243144989,
      "learning_rate": 8.586985729815895e-06,
      "loss": 0.3601,
      "step": 1604
    },
    {
      "epoch": 0.201,
      "grad_norm": 0.5078595876693726,
      "learning_rate": 8.584553038252415e-06,
      "loss": 0.3255,
      "step": 1605
    },
    {
      "epoch": 0.2012,
      "grad_norm": 0.5894535183906555,
      "learning_rate": 8.58211859962133e-06,
      "loss": 0.3431,
      "step": 1606
    },
    {
      "epoch": 0.2014,
      "grad_norm": 0.47840118408203125,
      "learning_rate": 8.579682415109156e-06,
      "loss": 0.3322,
      "step": 1607
    },
    {
      "epoch": 0.2016,
      "grad_norm": 0.49606576561927795,
      "learning_rate": 8.57724448590326e-06,
      "loss": 0.3194,
      "step": 1608
    },
    {
      "epoch": 0.2018,
      "grad_norm": 0.4647481143474579,
      "learning_rate": 8.574804813191859e-06,
      "loss": 0.3517,
      "step": 1609
    },
    {
      "epoch": 0.202,
      "grad_norm": 0.6018632054328918,
      "learning_rate": 8.572363398164017e-06,
      "loss": 0.3518,
      "step": 1610
    },
    {
      "epoch": 0.2022,
      "grad_norm": 0.4947096109390259,
      "learning_rate": 8.569920242009655e-06,
      "loss": 0.3883,
      "step": 1611
    },
    {
      "epoch": 0.2024,
      "grad_norm": 0.5888226628303528,
      "learning_rate": 8.567475345919532e-06,
      "loss": 0.3763,
      "step": 1612
    },
    {
      "epoch": 0.2026,
      "grad_norm": 0.5231422185897827,
      "learning_rate": 8.565028711085266e-06,
      "loss": 0.3496,
      "step": 1613
    },
    {
      "epoch": 0.2028,
      "grad_norm": 0.5471017956733704,
      "learning_rate": 8.562580338699313e-06,
      "loss": 0.316,
      "step": 1614
    },
    {
      "epoch": 0.203,
      "grad_norm": 0.4260873794555664,
      "learning_rate": 8.560130229954985e-06,
      "loss": 0.3538,
      "step": 1615
    },
    {
      "epoch": 0.2032,
      "grad_norm": 0.4958387017250061,
      "learning_rate": 8.557678386046429e-06,
      "loss": 0.3204,
      "step": 1616
    },
    {
      "epoch": 0.2034,
      "grad_norm": 0.5726692080497742,
      "learning_rate": 8.555224808168644e-06,
      "loss": 0.3344,
      "step": 1617
    },
    {
      "epoch": 0.2036,
      "grad_norm": 0.4626884162425995,
      "learning_rate": 8.55276949751748e-06,
      "loss": 0.3369,
      "step": 1618
    },
    {
      "epoch": 0.2038,
      "grad_norm": 0.4694746136665344,
      "learning_rate": 8.550312455289624e-06,
      "loss": 0.3627,
      "step": 1619
    },
    {
      "epoch": 0.204,
      "grad_norm": 0.40748921036720276,
      "learning_rate": 8.547853682682605e-06,
      "loss": 0.3619,
      "step": 1620
    },
    {
      "epoch": 0.2042,
      "grad_norm": 0.7755879163742065,
      "learning_rate": 8.545393180894801e-06,
      "loss": 0.3328,
      "step": 1621
    },
    {
      "epoch": 0.2044,
      "grad_norm": 0.5510193109512329,
      "learning_rate": 8.542930951125432e-06,
      "loss": 0.3532,
      "step": 1622
    },
    {
      "epoch": 0.2046,
      "grad_norm": 0.4839576780796051,
      "learning_rate": 8.540466994574556e-06,
      "loss": 0.3295,
      "step": 1623
    },
    {
      "epoch": 0.2048,
      "grad_norm": 0.6029901504516602,
      "learning_rate": 8.538001312443078e-06,
      "loss": 0.3592,
      "step": 1624
    },
    {
      "epoch": 0.205,
      "grad_norm": 0.4617214500904083,
      "learning_rate": 8.535533905932739e-06,
      "loss": 0.3287,
      "step": 1625
    },
    {
      "epoch": 0.2052,
      "grad_norm": 0.5386726260185242,
      "learning_rate": 8.533064776246126e-06,
      "loss": 0.3563,
      "step": 1626
    },
    {
      "epoch": 0.2054,
      "grad_norm": 0.5069786310195923,
      "learning_rate": 8.530593924586659e-06,
      "loss": 0.3312,
      "step": 1627
    },
    {
      "epoch": 0.2056,
      "grad_norm": 0.4504518210887909,
      "learning_rate": 8.528121352158604e-06,
      "loss": 0.3372,
      "step": 1628
    },
    {
      "epoch": 0.2058,
      "grad_norm": 0.573756992816925,
      "learning_rate": 8.525647060167063e-06,
      "loss": 0.3804,
      "step": 1629
    },
    {
      "epoch": 0.206,
      "grad_norm": 0.5095866918563843,
      "learning_rate": 8.523171049817974e-06,
      "loss": 0.3662,
      "step": 1630
    },
    {
      "epoch": 0.2062,
      "grad_norm": 0.4881170690059662,
      "learning_rate": 8.520693322318116e-06,
      "loss": 0.3758,
      "step": 1631
    },
    {
      "epoch": 0.2064,
      "grad_norm": 0.4154106378555298,
      "learning_rate": 8.518213878875103e-06,
      "loss": 0.3369,
      "step": 1632
    },
    {
      "epoch": 0.2066,
      "grad_norm": 0.3967030346393585,
      "learning_rate": 8.515732720697383e-06,
      "loss": 0.324,
      "step": 1633
    },
    {
      "epoch": 0.2068,
      "grad_norm": 0.43523868918418884,
      "learning_rate": 8.513249848994248e-06,
      "loss": 0.3495,
      "step": 1634
    },
    {
      "epoch": 0.207,
      "grad_norm": 0.5306499600410461,
      "learning_rate": 8.510765264975813e-06,
      "loss": 0.3378,
      "step": 1635
    },
    {
      "epoch": 0.2072,
      "grad_norm": 0.5891044735908508,
      "learning_rate": 8.508278969853037e-06,
      "loss": 0.3281,
      "step": 1636
    },
    {
      "epoch": 0.2074,
      "grad_norm": 0.5105694532394409,
      "learning_rate": 8.505790964837712e-06,
      "loss": 0.4038,
      "step": 1637
    },
    {
      "epoch": 0.2076,
      "grad_norm": 0.4806811213493347,
      "learning_rate": 8.50330125114246e-06,
      "loss": 0.3825,
      "step": 1638
    },
    {
      "epoch": 0.2078,
      "grad_norm": 0.4279062747955322,
      "learning_rate": 8.500809829980734e-06,
      "loss": 0.3481,
      "step": 1639
    },
    {
      "epoch": 0.208,
      "grad_norm": 0.4385508596897125,
      "learning_rate": 8.498316702566828e-06,
      "loss": 0.3311,
      "step": 1640
    },
    {
      "epoch": 0.2082,
      "grad_norm": 0.3885791301727295,
      "learning_rate": 8.495821870115857e-06,
      "loss": 0.3177,
      "step": 1641
    },
    {
      "epoch": 0.2084,
      "grad_norm": 0.5464277863502502,
      "learning_rate": 8.493325333843776e-06,
      "loss": 0.3504,
      "step": 1642
    },
    {
      "epoch": 0.2086,
      "grad_norm": 1.0653138160705566,
      "learning_rate": 8.490827094967364e-06,
      "loss": 0.3588,
      "step": 1643
    },
    {
      "epoch": 0.2088,
      "grad_norm": 0.4940497577190399,
      "learning_rate": 8.488327154704232e-06,
      "loss": 0.3715,
      "step": 1644
    },
    {
      "epoch": 0.209,
      "grad_norm": 0.5108953714370728,
      "learning_rate": 8.485825514272824e-06,
      "loss": 0.3283,
      "step": 1645
    },
    {
      "epoch": 0.2092,
      "grad_norm": 0.4458819329738617,
      "learning_rate": 8.483322174892404e-06,
      "loss": 0.3439,
      "step": 1646
    },
    {
      "epoch": 0.2094,
      "grad_norm": 0.4627915620803833,
      "learning_rate": 8.480817137783073e-06,
      "loss": 0.3575,
      "step": 1647
    },
    {
      "epoch": 0.2096,
      "grad_norm": 0.45368871092796326,
      "learning_rate": 8.478310404165756e-06,
      "loss": 0.3547,
      "step": 1648
    },
    {
      "epoch": 0.2098,
      "grad_norm": 0.5801319479942322,
      "learning_rate": 8.4758019752622e-06,
      "loss": 0.3621,
      "step": 1649
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.49640288949012756,
      "learning_rate": 8.473291852294986e-06,
      "loss": 0.3549,
      "step": 1650
    },
    {
      "epoch": 0.2102,
      "grad_norm": 0.5270460844039917,
      "learning_rate": 8.47078003648752e-06,
      "loss": 0.3267,
      "step": 1651
    },
    {
      "epoch": 0.2104,
      "grad_norm": 0.5748618245124817,
      "learning_rate": 8.468266529064025e-06,
      "loss": 0.3182,
      "step": 1652
    },
    {
      "epoch": 0.2106,
      "grad_norm": 0.4282214045524597,
      "learning_rate": 8.465751331249558e-06,
      "loss": 0.3444,
      "step": 1653
    },
    {
      "epoch": 0.2108,
      "grad_norm": 0.5308535099029541,
      "learning_rate": 8.463234444269994e-06,
      "loss": 0.3739,
      "step": 1654
    },
    {
      "epoch": 0.211,
      "grad_norm": 0.3920656442642212,
      "learning_rate": 8.460715869352035e-06,
      "loss": 0.3159,
      "step": 1655
    },
    {
      "epoch": 0.2112,
      "grad_norm": 0.5572400093078613,
      "learning_rate": 8.458195607723201e-06,
      "loss": 0.3827,
      "step": 1656
    },
    {
      "epoch": 0.2114,
      "grad_norm": 0.6678735017776489,
      "learning_rate": 8.45567366061184e-06,
      "loss": 0.3405,
      "step": 1657
    },
    {
      "epoch": 0.2116,
      "grad_norm": 0.5648146271705627,
      "learning_rate": 8.453150029247115e-06,
      "loss": 0.3331,
      "step": 1658
    },
    {
      "epoch": 0.2118,
      "grad_norm": 0.4796130955219269,
      "learning_rate": 8.450624714859016e-06,
      "loss": 0.3306,
      "step": 1659
    },
    {
      "epoch": 0.212,
      "grad_norm": 0.45960214734077454,
      "learning_rate": 8.44809771867835e-06,
      "loss": 0.3249,
      "step": 1660
    },
    {
      "epoch": 0.2122,
      "grad_norm": 0.4859941303730011,
      "learning_rate": 8.445569041936743e-06,
      "loss": 0.349,
      "step": 1661
    },
    {
      "epoch": 0.2124,
      "grad_norm": 0.4080614745616913,
      "learning_rate": 8.443038685866643e-06,
      "loss": 0.3227,
      "step": 1662
    },
    {
      "epoch": 0.2126,
      "grad_norm": 0.5422409176826477,
      "learning_rate": 8.440506651701315e-06,
      "loss": 0.3669,
      "step": 1663
    },
    {
      "epoch": 0.2128,
      "grad_norm": 0.5250568389892578,
      "learning_rate": 8.437972940674838e-06,
      "loss": 0.329,
      "step": 1664
    },
    {
      "epoch": 0.213,
      "grad_norm": 0.49791377782821655,
      "learning_rate": 8.435437554022116e-06,
      "loss": 0.3433,
      "step": 1665
    },
    {
      "epoch": 0.2132,
      "grad_norm": 0.46748220920562744,
      "learning_rate": 8.432900492978864e-06,
      "loss": 0.3508,
      "step": 1666
    },
    {
      "epoch": 0.2134,
      "grad_norm": 0.43859753012657166,
      "learning_rate": 8.430361758781616e-06,
      "loss": 0.3482,
      "step": 1667
    },
    {
      "epoch": 0.2136,
      "grad_norm": 0.9417821168899536,
      "learning_rate": 8.427821352667719e-06,
      "loss": 0.3461,
      "step": 1668
    },
    {
      "epoch": 0.2138,
      "grad_norm": 0.4269940257072449,
      "learning_rate": 8.425279275875336e-06,
      "loss": 0.3093,
      "step": 1669
    },
    {
      "epoch": 0.214,
      "grad_norm": 0.4919665455818176,
      "learning_rate": 8.422735529643445e-06,
      "loss": 0.3416,
      "step": 1670
    },
    {
      "epoch": 0.2142,
      "grad_norm": 0.5763488411903381,
      "learning_rate": 8.420190115211835e-06,
      "loss": 0.3656,
      "step": 1671
    },
    {
      "epoch": 0.2144,
      "grad_norm": 0.5865827798843384,
      "learning_rate": 8.417643033821114e-06,
      "loss": 0.3378,
      "step": 1672
    },
    {
      "epoch": 0.2146,
      "grad_norm": 0.4984340965747833,
      "learning_rate": 8.415094286712694e-06,
      "loss": 0.359,
      "step": 1673
    },
    {
      "epoch": 0.2148,
      "grad_norm": 0.4930255115032196,
      "learning_rate": 8.412543875128809e-06,
      "loss": 0.3521,
      "step": 1674
    },
    {
      "epoch": 0.215,
      "grad_norm": 0.44493401050567627,
      "learning_rate": 8.409991800312493e-06,
      "loss": 0.3102,
      "step": 1675
    },
    {
      "epoch": 0.2152,
      "grad_norm": 0.5498883724212646,
      "learning_rate": 8.4074380635076e-06,
      "loss": 0.3308,
      "step": 1676
    },
    {
      "epoch": 0.2154,
      "grad_norm": 0.6141671538352966,
      "learning_rate": 8.404882665958788e-06,
      "loss": 0.3822,
      "step": 1677
    },
    {
      "epoch": 0.2156,
      "grad_norm": 0.6259552240371704,
      "learning_rate": 8.402325608911527e-06,
      "loss": 0.376,
      "step": 1678
    },
    {
      "epoch": 0.2158,
      "grad_norm": 0.4902631640434265,
      "learning_rate": 8.399766893612096e-06,
      "loss": 0.3732,
      "step": 1679
    },
    {
      "epoch": 0.216,
      "grad_norm": 0.460875928401947,
      "learning_rate": 8.397206521307584e-06,
      "loss": 0.3485,
      "step": 1680
    },
    {
      "epoch": 0.2162,
      "grad_norm": 0.4597313404083252,
      "learning_rate": 8.394644493245882e-06,
      "loss": 0.339,
      "step": 1681
    },
    {
      "epoch": 0.2164,
      "grad_norm": 0.5307125449180603,
      "learning_rate": 8.392080810675692e-06,
      "loss": 0.3809,
      "step": 1682
    },
    {
      "epoch": 0.2166,
      "grad_norm": 0.4510352611541748,
      "learning_rate": 8.389515474846522e-06,
      "loss": 0.3661,
      "step": 1683
    },
    {
      "epoch": 0.2168,
      "grad_norm": 0.5440855026245117,
      "learning_rate": 8.386948487008687e-06,
      "loss": 0.3297,
      "step": 1684
    },
    {
      "epoch": 0.217,
      "grad_norm": 0.5890617966651917,
      "learning_rate": 8.384379848413304e-06,
      "loss": 0.347,
      "step": 1685
    },
    {
      "epoch": 0.2172,
      "grad_norm": 0.4329816699028015,
      "learning_rate": 8.381809560312298e-06,
      "loss": 0.353,
      "step": 1686
    },
    {
      "epoch": 0.2174,
      "grad_norm": 0.451326847076416,
      "learning_rate": 8.379237623958393e-06,
      "loss": 0.3653,
      "step": 1687
    },
    {
      "epoch": 0.2176,
      "grad_norm": 0.6831097602844238,
      "learning_rate": 8.376664040605122e-06,
      "loss": 0.3386,
      "step": 1688
    },
    {
      "epoch": 0.2178,
      "grad_norm": 0.4325626790523529,
      "learning_rate": 8.374088811506819e-06,
      "loss": 0.3275,
      "step": 1689
    },
    {
      "epoch": 0.218,
      "grad_norm": 0.5566141605377197,
      "learning_rate": 8.371511937918616e-06,
      "loss": 0.3403,
      "step": 1690
    },
    {
      "epoch": 0.2182,
      "grad_norm": 0.5806346535682678,
      "learning_rate": 8.368933421096454e-06,
      "loss": 0.3353,
      "step": 1691
    },
    {
      "epoch": 0.2184,
      "grad_norm": 0.534765362739563,
      "learning_rate": 8.366353262297069e-06,
      "loss": 0.3634,
      "step": 1692
    },
    {
      "epoch": 0.2186,
      "grad_norm": 1.068870186805725,
      "learning_rate": 8.363771462778e-06,
      "loss": 0.3675,
      "step": 1693
    },
    {
      "epoch": 0.2188,
      "grad_norm": 0.49633491039276123,
      "learning_rate": 8.361188023797581e-06,
      "loss": 0.3554,
      "step": 1694
    },
    {
      "epoch": 0.219,
      "grad_norm": 0.4688683748245239,
      "learning_rate": 8.358602946614952e-06,
      "loss": 0.3291,
      "step": 1695
    },
    {
      "epoch": 0.2192,
      "grad_norm": 0.462571382522583,
      "learning_rate": 8.356016232490047e-06,
      "loss": 0.3342,
      "step": 1696
    },
    {
      "epoch": 0.2194,
      "grad_norm": 0.5672478675842285,
      "learning_rate": 8.353427882683601e-06,
      "loss": 0.3397,
      "step": 1697
    },
    {
      "epoch": 0.2196,
      "grad_norm": 0.4890166223049164,
      "learning_rate": 8.350837898457142e-06,
      "loss": 0.3099,
      "step": 1698
    },
    {
      "epoch": 0.2198,
      "grad_norm": 0.5141677856445312,
      "learning_rate": 8.348246281072998e-06,
      "loss": 0.3381,
      "step": 1699
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.5200169682502747,
      "learning_rate": 8.345653031794292e-06,
      "loss": 0.3426,
      "step": 1700
    },
    {
      "epoch": 0.2202,
      "grad_norm": 0.5149275660514832,
      "learning_rate": 8.343058151884942e-06,
      "loss": 0.3343,
      "step": 1701
    },
    {
      "epoch": 0.2204,
      "grad_norm": 0.4673578441143036,
      "learning_rate": 8.34046164260966e-06,
      "loss": 0.3858,
      "step": 1702
    },
    {
      "epoch": 0.2206,
      "grad_norm": 0.5561712980270386,
      "learning_rate": 8.337863505233954e-06,
      "loss": 0.3484,
      "step": 1703
    },
    {
      "epoch": 0.2208,
      "grad_norm": 0.47411850094795227,
      "learning_rate": 8.335263741024123e-06,
      "loss": 0.3102,
      "step": 1704
    },
    {
      "epoch": 0.221,
      "grad_norm": 0.41681548953056335,
      "learning_rate": 8.332662351247262e-06,
      "loss": 0.3181,
      "step": 1705
    },
    {
      "epoch": 0.2212,
      "grad_norm": 0.5630611777305603,
      "learning_rate": 8.33005933717126e-06,
      "loss": 0.3698,
      "step": 1706
    },
    {
      "epoch": 0.2214,
      "grad_norm": 0.45684170722961426,
      "learning_rate": 8.327454700064788e-06,
      "loss": 0.3492,
      "step": 1707
    },
    {
      "epoch": 0.2216,
      "grad_norm": 0.5563671588897705,
      "learning_rate": 8.324848441197317e-06,
      "loss": 0.3497,
      "step": 1708
    },
    {
      "epoch": 0.2218,
      "grad_norm": 0.4692704677581787,
      "learning_rate": 8.32224056183911e-06,
      "loss": 0.3286,
      "step": 1709
    },
    {
      "epoch": 0.222,
      "grad_norm": 0.5039827227592468,
      "learning_rate": 8.319631063261209e-06,
      "loss": 0.3336,
      "step": 1710
    },
    {
      "epoch": 0.2222,
      "grad_norm": 0.5676580667495728,
      "learning_rate": 8.317019946735456e-06,
      "loss": 0.3506,
      "step": 1711
    },
    {
      "epoch": 0.2224,
      "grad_norm": 0.6083831787109375,
      "learning_rate": 8.314407213534477e-06,
      "loss": 0.3656,
      "step": 1712
    },
    {
      "epoch": 0.2226,
      "grad_norm": 0.4888567626476288,
      "learning_rate": 8.311792864931686e-06,
      "loss": 0.3175,
      "step": 1713
    },
    {
      "epoch": 0.2228,
      "grad_norm": 0.5748130679130554,
      "learning_rate": 8.309176902201283e-06,
      "loss": 0.3729,
      "step": 1714
    },
    {
      "epoch": 0.223,
      "grad_norm": 0.505332887172699,
      "learning_rate": 8.30655932661826e-06,
      "loss": 0.3503,
      "step": 1715
    },
    {
      "epoch": 0.2232,
      "grad_norm": 0.49530330300331116,
      "learning_rate": 8.303940139458389e-06,
      "loss": 0.3136,
      "step": 1716
    },
    {
      "epoch": 0.2234,
      "grad_norm": 0.5812203288078308,
      "learning_rate": 8.301319341998231e-06,
      "loss": 0.3676,
      "step": 1717
    },
    {
      "epoch": 0.2236,
      "grad_norm": 0.7566093802452087,
      "learning_rate": 8.298696935515132e-06,
      "loss": 0.3557,
      "step": 1718
    },
    {
      "epoch": 0.2238,
      "grad_norm": 0.6099835634231567,
      "learning_rate": 8.296072921287217e-06,
      "loss": 0.3429,
      "step": 1719
    },
    {
      "epoch": 0.224,
      "grad_norm": 0.6000272035598755,
      "learning_rate": 8.293447300593402e-06,
      "loss": 0.3493,
      "step": 1720
    },
    {
      "epoch": 0.2242,
      "grad_norm": 0.5075767636299133,
      "learning_rate": 8.290820074713383e-06,
      "loss": 0.3598,
      "step": 1721
    },
    {
      "epoch": 0.2244,
      "grad_norm": 0.5548860430717468,
      "learning_rate": 8.288191244927637e-06,
      "loss": 0.3648,
      "step": 1722
    },
    {
      "epoch": 0.2246,
      "grad_norm": 0.4220461845397949,
      "learning_rate": 8.285560812517423e-06,
      "loss": 0.3245,
      "step": 1723
    },
    {
      "epoch": 0.2248,
      "grad_norm": 0.7412163615226746,
      "learning_rate": 8.282928778764783e-06,
      "loss": 0.3434,
      "step": 1724
    },
    {
      "epoch": 0.225,
      "grad_norm": 0.5615155696868896,
      "learning_rate": 8.280295144952537e-06,
      "loss": 0.3382,
      "step": 1725
    },
    {
      "epoch": 0.2252,
      "grad_norm": 0.5223628282546997,
      "learning_rate": 8.277659912364288e-06,
      "loss": 0.3223,
      "step": 1726
    },
    {
      "epoch": 0.2254,
      "grad_norm": 0.44712507724761963,
      "learning_rate": 8.275023082284413e-06,
      "loss": 0.3394,
      "step": 1727
    },
    {
      "epoch": 0.2256,
      "grad_norm": 0.475110799074173,
      "learning_rate": 8.272384655998075e-06,
      "loss": 0.3457,
      "step": 1728
    },
    {
      "epoch": 0.2258,
      "grad_norm": 1.6953761577606201,
      "learning_rate": 8.269744634791207e-06,
      "loss": 0.3611,
      "step": 1729
    },
    {
      "epoch": 0.226,
      "grad_norm": 0.47163257002830505,
      "learning_rate": 8.267103019950529e-06,
      "loss": 0.3506,
      "step": 1730
    },
    {
      "epoch": 0.2262,
      "grad_norm": 0.4643881022930145,
      "learning_rate": 8.264459812763525e-06,
      "loss": 0.3664,
      "step": 1731
    },
    {
      "epoch": 0.2264,
      "grad_norm": 0.47999367117881775,
      "learning_rate": 8.261815014518465e-06,
      "loss": 0.3447,
      "step": 1732
    },
    {
      "epoch": 0.2266,
      "grad_norm": 0.6682723164558411,
      "learning_rate": 8.259168626504395e-06,
      "loss": 0.3342,
      "step": 1733
    },
    {
      "epoch": 0.2268,
      "grad_norm": 0.5405193567276001,
      "learning_rate": 8.256520650011126e-06,
      "loss": 0.3476,
      "step": 1734
    },
    {
      "epoch": 0.227,
      "grad_norm": 0.5075260996818542,
      "learning_rate": 8.253871086329255e-06,
      "loss": 0.3477,
      "step": 1735
    },
    {
      "epoch": 0.2272,
      "grad_norm": 0.49539700150489807,
      "learning_rate": 8.251219936750145e-06,
      "loss": 0.3611,
      "step": 1736
    },
    {
      "epoch": 0.2274,
      "grad_norm": 0.6321626305580139,
      "learning_rate": 8.248567202565934e-06,
      "loss": 0.3531,
      "step": 1737
    },
    {
      "epoch": 0.2276,
      "grad_norm": 0.5366260409355164,
      "learning_rate": 8.24591288506953e-06,
      "loss": 0.3405,
      "step": 1738
    },
    {
      "epoch": 0.2278,
      "grad_norm": 1.6333529949188232,
      "learning_rate": 8.243256985554622e-06,
      "loss": 0.3726,
      "step": 1739
    },
    {
      "epoch": 0.228,
      "grad_norm": 0.47081300616264343,
      "learning_rate": 8.240599505315656e-06,
      "loss": 0.3269,
      "step": 1740
    },
    {
      "epoch": 0.2282,
      "grad_norm": 0.48147860169410706,
      "learning_rate": 8.237940445647858e-06,
      "loss": 0.3103,
      "step": 1741
    },
    {
      "epoch": 0.2284,
      "grad_norm": 0.5155319571495056,
      "learning_rate": 8.235279807847223e-06,
      "loss": 0.3894,
      "step": 1742
    },
    {
      "epoch": 0.2286,
      "grad_norm": 0.4297954738140106,
      "learning_rate": 8.232617593210512e-06,
      "loss": 0.3293,
      "step": 1743
    },
    {
      "epoch": 0.2288,
      "grad_norm": 0.4423837959766388,
      "learning_rate": 8.229953803035256e-06,
      "loss": 0.3505,
      "step": 1744
    },
    {
      "epoch": 0.229,
      "grad_norm": 0.5715885758399963,
      "learning_rate": 8.227288438619754e-06,
      "loss": 0.3811,
      "step": 1745
    },
    {
      "epoch": 0.2292,
      "grad_norm": 0.49395740032196045,
      "learning_rate": 8.224621501263073e-06,
      "loss": 0.3509,
      "step": 1746
    },
    {
      "epoch": 0.2294,
      "grad_norm": 0.5235156416893005,
      "learning_rate": 8.221952992265046e-06,
      "loss": 0.3704,
      "step": 1747
    },
    {
      "epoch": 0.2296,
      "grad_norm": 0.45405644178390503,
      "learning_rate": 8.21928291292627e-06,
      "loss": 0.3428,
      "step": 1748
    },
    {
      "epoch": 0.2298,
      "grad_norm": 0.47186338901519775,
      "learning_rate": 8.21661126454811e-06,
      "loss": 0.3435,
      "step": 1749
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.4400550425052643,
      "learning_rate": 8.213938048432697e-06,
      "loss": 0.3473,
      "step": 1750
    },
    {
      "epoch": 0.2302,
      "grad_norm": 0.5584324598312378,
      "learning_rate": 8.211263265882923e-06,
      "loss": 0.3485,
      "step": 1751
    },
    {
      "epoch": 0.2304,
      "grad_norm": 0.5276550650596619,
      "learning_rate": 8.208586918202444e-06,
      "loss": 0.3785,
      "step": 1752
    },
    {
      "epoch": 0.2306,
      "grad_norm": 0.6978746652603149,
      "learning_rate": 8.205909006695679e-06,
      "loss": 0.3703,
      "step": 1753
    },
    {
      "epoch": 0.2308,
      "grad_norm": 0.49288398027420044,
      "learning_rate": 8.203229532667808e-06,
      "loss": 0.3182,
      "step": 1754
    },
    {
      "epoch": 0.231,
      "grad_norm": 0.46877527236938477,
      "learning_rate": 8.200548497424779e-06,
      "loss": 0.3464,
      "step": 1755
    },
    {
      "epoch": 0.2312,
      "grad_norm": 0.41701969504356384,
      "learning_rate": 8.197865902273291e-06,
      "loss": 0.3334,
      "step": 1756
    },
    {
      "epoch": 0.2314,
      "grad_norm": 0.5285722613334656,
      "learning_rate": 8.19518174852081e-06,
      "loss": 0.3671,
      "step": 1757
    },
    {
      "epoch": 0.2316,
      "grad_norm": 0.984215259552002,
      "learning_rate": 8.192496037475562e-06,
      "loss": 0.357,
      "step": 1758
    },
    {
      "epoch": 0.2318,
      "grad_norm": 0.8511497974395752,
      "learning_rate": 8.189808770446528e-06,
      "loss": 0.3546,
      "step": 1759
    },
    {
      "epoch": 0.232,
      "grad_norm": 0.5076833963394165,
      "learning_rate": 8.18711994874345e-06,
      "loss": 0.3323,
      "step": 1760
    },
    {
      "epoch": 0.2322,
      "grad_norm": 0.4626636803150177,
      "learning_rate": 8.184429573676825e-06,
      "loss": 0.3364,
      "step": 1761
    },
    {
      "epoch": 0.2324,
      "grad_norm": 0.46679437160491943,
      "learning_rate": 8.181737646557912e-06,
      "loss": 0.3089,
      "step": 1762
    },
    {
      "epoch": 0.2326,
      "grad_norm": 0.45653796195983887,
      "learning_rate": 8.179044168698722e-06,
      "loss": 0.3374,
      "step": 1763
    },
    {
      "epoch": 0.2328,
      "grad_norm": 0.49581649899482727,
      "learning_rate": 8.176349141412022e-06,
      "loss": 0.3198,
      "step": 1764
    },
    {
      "epoch": 0.233,
      "grad_norm": 0.6125779151916504,
      "learning_rate": 8.173652566011339e-06,
      "loss": 0.3556,
      "step": 1765
    },
    {
      "epoch": 0.2332,
      "grad_norm": 2.9884777069091797,
      "learning_rate": 8.170954443810947e-06,
      "loss": 0.4007,
      "step": 1766
    },
    {
      "epoch": 0.2334,
      "grad_norm": 0.6647442579269409,
      "learning_rate": 8.168254776125883e-06,
      "loss": 0.3391,
      "step": 1767
    },
    {
      "epoch": 0.2336,
      "grad_norm": 0.5352967977523804,
      "learning_rate": 8.165553564271928e-06,
      "loss": 0.3395,
      "step": 1768
    },
    {
      "epoch": 0.2338,
      "grad_norm": 0.5860040783882141,
      "learning_rate": 8.162850809565623e-06,
      "loss": 0.3276,
      "step": 1769
    },
    {
      "epoch": 0.234,
      "grad_norm": 0.4770520031452179,
      "learning_rate": 8.160146513324256e-06,
      "loss": 0.3178,
      "step": 1770
    },
    {
      "epoch": 0.2342,
      "grad_norm": 0.7810896039009094,
      "learning_rate": 8.157440676865866e-06,
      "loss": 0.344,
      "step": 1771
    },
    {
      "epoch": 0.2344,
      "grad_norm": 0.5418007969856262,
      "learning_rate": 8.154733301509249e-06,
      "loss": 0.3278,
      "step": 1772
    },
    {
      "epoch": 0.2346,
      "grad_norm": 1.0555593967437744,
      "learning_rate": 8.152024388573945e-06,
      "loss": 0.3516,
      "step": 1773
    },
    {
      "epoch": 0.2348,
      "grad_norm": 0.4922106862068176,
      "learning_rate": 8.149313939380244e-06,
      "loss": 0.3354,
      "step": 1774
    },
    {
      "epoch": 0.235,
      "grad_norm": 0.5214471220970154,
      "learning_rate": 8.146601955249187e-06,
      "loss": 0.3553,
      "step": 1775
    },
    {
      "epoch": 0.2352,
      "grad_norm": 0.5606301426887512,
      "learning_rate": 8.143888437502565e-06,
      "loss": 0.3403,
      "step": 1776
    },
    {
      "epoch": 0.2354,
      "grad_norm": 0.5911112427711487,
      "learning_rate": 8.141173387462908e-06,
      "loss": 0.3608,
      "step": 1777
    },
    {
      "epoch": 0.2356,
      "grad_norm": 0.5367752313613892,
      "learning_rate": 8.138456806453503e-06,
      "loss": 0.3273,
      "step": 1778
    },
    {
      "epoch": 0.2358,
      "grad_norm": 0.5187506675720215,
      "learning_rate": 8.135738695798377e-06,
      "loss": 0.3528,
      "step": 1779
    },
    {
      "epoch": 0.236,
      "grad_norm": 0.6090713739395142,
      "learning_rate": 8.133019056822303e-06,
      "loss": 0.3358,
      "step": 1780
    },
    {
      "epoch": 0.2362,
      "grad_norm": 0.404071182012558,
      "learning_rate": 8.130297890850803e-06,
      "loss": 0.3114,
      "step": 1781
    },
    {
      "epoch": 0.2364,
      "grad_norm": 0.9640373587608337,
      "learning_rate": 8.127575199210136e-06,
      "loss": 0.3601,
      "step": 1782
    },
    {
      "epoch": 0.2366,
      "grad_norm": 0.4254966676235199,
      "learning_rate": 8.124850983227313e-06,
      "loss": 0.3463,
      "step": 1783
    },
    {
      "epoch": 0.2368,
      "grad_norm": 0.6818743348121643,
      "learning_rate": 8.12212524423008e-06,
      "loss": 0.3573,
      "step": 1784
    },
    {
      "epoch": 0.237,
      "grad_norm": 0.4768708050251007,
      "learning_rate": 8.119397983546932e-06,
      "loss": 0.3494,
      "step": 1785
    },
    {
      "epoch": 0.2372,
      "grad_norm": 0.46920323371887207,
      "learning_rate": 8.116669202507102e-06,
      "loss": 0.3433,
      "step": 1786
    },
    {
      "epoch": 0.2374,
      "grad_norm": 0.6208242774009705,
      "learning_rate": 8.113938902440563e-06,
      "loss": 0.3599,
      "step": 1787
    },
    {
      "epoch": 0.2376,
      "grad_norm": 0.400144100189209,
      "learning_rate": 8.111207084678033e-06,
      "loss": 0.3317,
      "step": 1788
    },
    {
      "epoch": 0.2378,
      "grad_norm": 0.5043452382087708,
      "learning_rate": 8.108473750550965e-06,
      "loss": 0.3479,
      "step": 1789
    },
    {
      "epoch": 0.238,
      "grad_norm": 0.4860473871231079,
      "learning_rate": 8.105738901391553e-06,
      "loss": 0.3548,
      "step": 1790
    },
    {
      "epoch": 0.2382,
      "grad_norm": 0.5142382383346558,
      "learning_rate": 8.103002538532729e-06,
      "loss": 0.3249,
      "step": 1791
    },
    {
      "epoch": 0.2384,
      "grad_norm": 0.5613958835601807,
      "learning_rate": 8.100264663308165e-06,
      "loss": 0.3221,
      "step": 1792
    },
    {
      "epoch": 0.2386,
      "grad_norm": 0.45309245586395264,
      "learning_rate": 8.097525277052265e-06,
      "loss": 0.3351,
      "step": 1793
    },
    {
      "epoch": 0.2388,
      "grad_norm": 0.48897141218185425,
      "learning_rate": 8.094784381100174e-06,
      "loss": 0.3299,
      "step": 1794
    },
    {
      "epoch": 0.239,
      "grad_norm": 0.4601038694381714,
      "learning_rate": 8.092041976787772e-06,
      "loss": 0.3239,
      "step": 1795
    },
    {
      "epoch": 0.2392,
      "grad_norm": 0.4728291630744934,
      "learning_rate": 8.089298065451673e-06,
      "loss": 0.3555,
      "step": 1796
    },
    {
      "epoch": 0.2394,
      "grad_norm": 0.5368895530700684,
      "learning_rate": 8.086552648429225e-06,
      "loss": 0.3467,
      "step": 1797
    },
    {
      "epoch": 0.2396,
      "grad_norm": 0.7119045257568359,
      "learning_rate": 8.083805727058514e-06,
      "loss": 0.3583,
      "step": 1798
    },
    {
      "epoch": 0.2398,
      "grad_norm": 0.47481662034988403,
      "learning_rate": 8.081057302678352e-06,
      "loss": 0.3781,
      "step": 1799
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.4862152636051178,
      "learning_rate": 8.078307376628292e-06,
      "loss": 0.3515,
      "step": 1800
    },
    {
      "epoch": 0.2402,
      "grad_norm": 0.46981650590896606,
      "learning_rate": 8.075555950248613e-06,
      "loss": 0.3476,
      "step": 1801
    },
    {
      "epoch": 0.2404,
      "grad_norm": 0.42784491181373596,
      "learning_rate": 8.072803024880322e-06,
      "loss": 0.3363,
      "step": 1802
    },
    {
      "epoch": 0.2406,
      "grad_norm": 0.5026899576187134,
      "learning_rate": 8.07004860186517e-06,
      "loss": 0.3725,
      "step": 1803
    },
    {
      "epoch": 0.2408,
      "grad_norm": 0.5207549929618835,
      "learning_rate": 8.067292682545622e-06,
      "loss": 0.3777,
      "step": 1804
    },
    {
      "epoch": 0.241,
      "grad_norm": 0.47701454162597656,
      "learning_rate": 8.064535268264883e-06,
      "loss": 0.3327,
      "step": 1805
    },
    {
      "epoch": 0.2412,
      "grad_norm": 0.42694535851478577,
      "learning_rate": 8.061776360366883e-06,
      "loss": 0.3235,
      "step": 1806
    },
    {
      "epoch": 0.2414,
      "grad_norm": 0.523194432258606,
      "learning_rate": 8.05901596019628e-06,
      "loss": 0.3283,
      "step": 1807
    },
    {
      "epoch": 0.2416,
      "grad_norm": 0.47192296385765076,
      "learning_rate": 8.05625406909846e-06,
      "loss": 0.342,
      "step": 1808
    },
    {
      "epoch": 0.2418,
      "grad_norm": 0.5124325752258301,
      "learning_rate": 8.053490688419532e-06,
      "loss": 0.3466,
      "step": 1809
    },
    {
      "epoch": 0.242,
      "grad_norm": 0.4753555357456207,
      "learning_rate": 8.05072581950634e-06,
      "loss": 0.328,
      "step": 1810
    },
    {
      "epoch": 0.2422,
      "grad_norm": 1.0745570659637451,
      "learning_rate": 8.047959463706441e-06,
      "loss": 0.3674,
      "step": 1811
    },
    {
      "epoch": 0.2424,
      "grad_norm": 0.4051116704940796,
      "learning_rate": 8.045191622368128e-06,
      "loss": 0.321,
      "step": 1812
    },
    {
      "epoch": 0.2426,
      "grad_norm": 0.6549030542373657,
      "learning_rate": 8.04242229684041e-06,
      "loss": 0.336,
      "step": 1813
    },
    {
      "epoch": 0.2428,
      "grad_norm": 0.45452451705932617,
      "learning_rate": 8.039651488473028e-06,
      "loss": 0.3224,
      "step": 1814
    },
    {
      "epoch": 0.243,
      "grad_norm": 0.4192400574684143,
      "learning_rate": 8.036879198616434e-06,
      "loss": 0.3592,
      "step": 1815
    },
    {
      "epoch": 0.2432,
      "grad_norm": 0.6199817657470703,
      "learning_rate": 8.034105428621812e-06,
      "loss": 0.3323,
      "step": 1816
    },
    {
      "epoch": 0.2434,
      "grad_norm": 0.4550221860408783,
      "learning_rate": 8.031330179841062e-06,
      "loss": 0.3141,
      "step": 1817
    },
    {
      "epoch": 0.2436,
      "grad_norm": 0.5797282457351685,
      "learning_rate": 8.028553453626809e-06,
      "loss": 0.3458,
      "step": 1818
    },
    {
      "epoch": 0.2438,
      "grad_norm": 0.8500091433525085,
      "learning_rate": 8.02577525133239e-06,
      "loss": 0.3219,
      "step": 1819
    },
    {
      "epoch": 0.244,
      "grad_norm": 0.5768932700157166,
      "learning_rate": 8.022995574311876e-06,
      "loss": 0.3397,
      "step": 1820
    },
    {
      "epoch": 0.2442,
      "grad_norm": 0.4247712194919586,
      "learning_rate": 8.020214423920039e-06,
      "loss": 0.3337,
      "step": 1821
    },
    {
      "epoch": 0.2444,
      "grad_norm": 0.4556357264518738,
      "learning_rate": 8.017431801512384e-06,
      "loss": 0.2815,
      "step": 1822
    },
    {
      "epoch": 0.2446,
      "grad_norm": 0.629228949546814,
      "learning_rate": 8.014647708445124e-06,
      "loss": 0.3403,
      "step": 1823
    },
    {
      "epoch": 0.2448,
      "grad_norm": 0.55204838514328,
      "learning_rate": 8.011862146075194e-06,
      "loss": 0.3471,
      "step": 1824
    },
    {
      "epoch": 0.245,
      "grad_norm": 0.6197250485420227,
      "learning_rate": 8.009075115760243e-06,
      "loss": 0.3225,
      "step": 1825
    },
    {
      "epoch": 0.2452,
      "grad_norm": 0.5329393148422241,
      "learning_rate": 8.006286618858634e-06,
      "loss": 0.357,
      "step": 1826
    },
    {
      "epoch": 0.2454,
      "grad_norm": 0.6455016732215881,
      "learning_rate": 8.003496656729448e-06,
      "loss": 0.3223,
      "step": 1827
    },
    {
      "epoch": 0.2456,
      "grad_norm": 0.5010917782783508,
      "learning_rate": 8.000705230732478e-06,
      "loss": 0.3466,
      "step": 1828
    },
    {
      "epoch": 0.2458,
      "grad_norm": 0.4092196226119995,
      "learning_rate": 7.997912342228232e-06,
      "loss": 0.3196,
      "step": 1829
    },
    {
      "epoch": 0.246,
      "grad_norm": 1.062318205833435,
      "learning_rate": 7.99511799257793e-06,
      "loss": 0.3286,
      "step": 1830
    },
    {
      "epoch": 0.2462,
      "grad_norm": 0.5515164732933044,
      "learning_rate": 7.992322183143504e-06,
      "loss": 0.3367,
      "step": 1831
    },
    {
      "epoch": 0.2464,
      "grad_norm": 0.4319509267807007,
      "learning_rate": 7.989524915287595e-06,
      "loss": 0.3572,
      "step": 1832
    },
    {
      "epoch": 0.2466,
      "grad_norm": 0.49436935782432556,
      "learning_rate": 7.986726190373562e-06,
      "loss": 0.3425,
      "step": 1833
    },
    {
      "epoch": 0.2468,
      "grad_norm": 0.4285069406032562,
      "learning_rate": 7.983926009765464e-06,
      "loss": 0.3387,
      "step": 1834
    },
    {
      "epoch": 0.247,
      "grad_norm": 0.5270645022392273,
      "learning_rate": 7.981124374828079e-06,
      "loss": 0.3414,
      "step": 1835
    },
    {
      "epoch": 0.2472,
      "grad_norm": 1.420054316520691,
      "learning_rate": 7.978321286926892e-06,
      "loss": 0.3303,
      "step": 1836
    },
    {
      "epoch": 0.2474,
      "grad_norm": 0.5516757369041443,
      "learning_rate": 7.975516747428087e-06,
      "loss": 0.3555,
      "step": 1837
    },
    {
      "epoch": 0.2476,
      "grad_norm": 0.6075885891914368,
      "learning_rate": 7.972710757698567e-06,
      "loss": 0.3467,
      "step": 1838
    },
    {
      "epoch": 0.2478,
      "grad_norm": 0.5101215839385986,
      "learning_rate": 7.969903319105935e-06,
      "loss": 0.3846,
      "step": 1839
    },
    {
      "epoch": 0.248,
      "grad_norm": 0.37219661474227905,
      "learning_rate": 7.967094433018508e-06,
      "loss": 0.3269,
      "step": 1840
    },
    {
      "epoch": 0.2482,
      "grad_norm": 0.46790921688079834,
      "learning_rate": 7.964284100805297e-06,
      "loss": 0.3601,
      "step": 1841
    },
    {
      "epoch": 0.2484,
      "grad_norm": 0.5690418481826782,
      "learning_rate": 7.961472323836025e-06,
      "loss": 0.3521,
      "step": 1842
    },
    {
      "epoch": 0.2486,
      "grad_norm": 0.4146636128425598,
      "learning_rate": 7.95865910348112e-06,
      "loss": 0.3418,
      "step": 1843
    },
    {
      "epoch": 0.2488,
      "grad_norm": 0.42483121156692505,
      "learning_rate": 7.95584444111171e-06,
      "loss": 0.3597,
      "step": 1844
    },
    {
      "epoch": 0.249,
      "grad_norm": 0.622586190700531,
      "learning_rate": 7.953028338099628e-06,
      "loss": 0.3337,
      "step": 1845
    },
    {
      "epoch": 0.2492,
      "grad_norm": 0.48615866899490356,
      "learning_rate": 7.950210795817406e-06,
      "loss": 0.3336,
      "step": 1846
    },
    {
      "epoch": 0.2494,
      "grad_norm": 0.6236661076545715,
      "learning_rate": 7.947391815638284e-06,
      "loss": 0.3477,
      "step": 1847
    },
    {
      "epoch": 0.2496,
      "grad_norm": 0.5332852005958557,
      "learning_rate": 7.944571398936193e-06,
      "loss": 0.3446,
      "step": 1848
    },
    {
      "epoch": 0.2498,
      "grad_norm": 0.4931953251361847,
      "learning_rate": 7.941749547085778e-06,
      "loss": 0.3174,
      "step": 1849
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.47107163071632385,
      "learning_rate": 7.938926261462366e-06,
      "loss": 0.3443,
      "step": 1850
    },
    {
      "epoch": 0.2502,
      "grad_norm": 0.5304219722747803,
      "learning_rate": 7.936101543441998e-06,
      "loss": 0.3743,
      "step": 1851
    },
    {
      "epoch": 0.2504,
      "grad_norm": 0.4962477684020996,
      "learning_rate": 7.933275394401407e-06,
      "loss": 0.3381,
      "step": 1852
    },
    {
      "epoch": 0.2506,
      "grad_norm": 0.4331390857696533,
      "learning_rate": 7.930447815718022e-06,
      "loss": 0.3303,
      "step": 1853
    },
    {
      "epoch": 0.2508,
      "grad_norm": 0.5003830194473267,
      "learning_rate": 7.927618808769971e-06,
      "loss": 0.3311,
      "step": 1854
    },
    {
      "epoch": 0.251,
      "grad_norm": 0.43121203780174255,
      "learning_rate": 7.92478837493608e-06,
      "loss": 0.3541,
      "step": 1855
    },
    {
      "epoch": 0.2512,
      "grad_norm": 0.5378528833389282,
      "learning_rate": 7.921956515595861e-06,
      "loss": 0.3364,
      "step": 1856
    },
    {
      "epoch": 0.2514,
      "grad_norm": 0.40202540159225464,
      "learning_rate": 7.919123232129535e-06,
      "loss": 0.3244,
      "step": 1857
    },
    {
      "epoch": 0.2516,
      "grad_norm": 0.43961164355278015,
      "learning_rate": 7.916288525918008e-06,
      "loss": 0.3669,
      "step": 1858
    },
    {
      "epoch": 0.2518,
      "grad_norm": 0.5352548956871033,
      "learning_rate": 7.913452398342882e-06,
      "loss": 0.3374,
      "step": 1859
    },
    {
      "epoch": 0.252,
      "grad_norm": 0.45856818556785583,
      "learning_rate": 7.910614850786448e-06,
      "loss": 0.3332,
      "step": 1860
    },
    {
      "epoch": 0.2522,
      "grad_norm": 0.4582391381263733,
      "learning_rate": 7.907775884631694e-06,
      "loss": 0.338,
      "step": 1861
    },
    {
      "epoch": 0.2524,
      "grad_norm": 0.45242783427238464,
      "learning_rate": 7.904935501262301e-06,
      "loss": 0.3438,
      "step": 1862
    },
    {
      "epoch": 0.2526,
      "grad_norm": 0.5343955755233765,
      "learning_rate": 7.90209370206263e-06,
      "loss": 0.3495,
      "step": 1863
    },
    {
      "epoch": 0.2528,
      "grad_norm": 0.44888776540756226,
      "learning_rate": 7.899250488417746e-06,
      "loss": 0.3023,
      "step": 1864
    },
    {
      "epoch": 0.253,
      "grad_norm": 0.5676412582397461,
      "learning_rate": 7.896405861713393e-06,
      "loss": 0.3572,
      "step": 1865
    },
    {
      "epoch": 0.2532,
      "grad_norm": 0.4743630886077881,
      "learning_rate": 7.893559823336013e-06,
      "loss": 0.3316,
      "step": 1866
    },
    {
      "epoch": 0.2534,
      "grad_norm": 0.508740246295929,
      "learning_rate": 7.890712374672724e-06,
      "loss": 0.3422,
      "step": 1867
    },
    {
      "epoch": 0.2536,
      "grad_norm": 0.43512752652168274,
      "learning_rate": 7.887863517111337e-06,
      "loss": 0.3424,
      "step": 1868
    },
    {
      "epoch": 0.2538,
      "grad_norm": 0.44926151633262634,
      "learning_rate": 7.88501325204036e-06,
      "loss": 0.3255,
      "step": 1869
    },
    {
      "epoch": 0.254,
      "grad_norm": 0.5199503302574158,
      "learning_rate": 7.882161580848966e-06,
      "loss": 0.3749,
      "step": 1870
    },
    {
      "epoch": 0.2542,
      "grad_norm": 0.5011411309242249,
      "learning_rate": 7.879308504927034e-06,
      "loss": 0.3422,
      "step": 1871
    },
    {
      "epoch": 0.2544,
      "grad_norm": 0.562369167804718,
      "learning_rate": 7.876454025665114e-06,
      "loss": 0.3369,
      "step": 1872
    },
    {
      "epoch": 0.2546,
      "grad_norm": 0.5008328557014465,
      "learning_rate": 7.873598144454444e-06,
      "loss": 0.3835,
      "step": 1873
    },
    {
      "epoch": 0.2548,
      "grad_norm": 0.40572699904441833,
      "learning_rate": 7.87074086268695e-06,
      "loss": 0.2819,
      "step": 1874
    },
    {
      "epoch": 0.255,
      "grad_norm": 0.6950197219848633,
      "learning_rate": 7.86788218175523e-06,
      "loss": 0.3313,
      "step": 1875
    },
    {
      "epoch": 0.2552,
      "grad_norm": 0.5497646331787109,
      "learning_rate": 7.865022103052578e-06,
      "loss": 0.3409,
      "step": 1876
    },
    {
      "epoch": 0.2554,
      "grad_norm": 0.8487582206726074,
      "learning_rate": 7.862160627972956e-06,
      "loss": 0.381,
      "step": 1877
    },
    {
      "epoch": 0.2556,
      "grad_norm": 0.5277814269065857,
      "learning_rate": 7.859297757911013e-06,
      "loss": 0.3437,
      "step": 1878
    },
    {
      "epoch": 0.2558,
      "grad_norm": 0.467911958694458,
      "learning_rate": 7.856433494262078e-06,
      "loss": 0.3596,
      "step": 1879
    },
    {
      "epoch": 0.256,
      "grad_norm": 0.44449159502983093,
      "learning_rate": 7.85356783842216e-06,
      "loss": 0.3207,
      "step": 1880
    },
    {
      "epoch": 0.2562,
      "grad_norm": 0.4760729670524597,
      "learning_rate": 7.850700791787941e-06,
      "loss": 0.3524,
      "step": 1881
    },
    {
      "epoch": 0.2564,
      "grad_norm": 0.4767281115055084,
      "learning_rate": 7.847832355756788e-06,
      "loss": 0.3737,
      "step": 1882
    },
    {
      "epoch": 0.2566,
      "grad_norm": 0.49986281991004944,
      "learning_rate": 7.844962531726742e-06,
      "loss": 0.3462,
      "step": 1883
    },
    {
      "epoch": 0.2568,
      "grad_norm": 0.7095549702644348,
      "learning_rate": 7.842091321096515e-06,
      "loss": 0.3599,
      "step": 1884
    },
    {
      "epoch": 0.257,
      "grad_norm": 0.7484524846076965,
      "learning_rate": 7.839218725265507e-06,
      "loss": 0.3379,
      "step": 1885
    },
    {
      "epoch": 0.2572,
      "grad_norm": 0.4525449573993683,
      "learning_rate": 7.836344745633785e-06,
      "loss": 0.3478,
      "step": 1886
    },
    {
      "epoch": 0.2574,
      "grad_norm": 0.6043052077293396,
      "learning_rate": 7.833469383602086e-06,
      "loss": 0.3794,
      "step": 1887
    },
    {
      "epoch": 0.2576,
      "grad_norm": 0.46917805075645447,
      "learning_rate": 7.830592640571833e-06,
      "loss": 0.3384,
      "step": 1888
    },
    {
      "epoch": 0.2578,
      "grad_norm": 0.7596787214279175,
      "learning_rate": 7.827714517945116e-06,
      "loss": 0.3424,
      "step": 1889
    },
    {
      "epoch": 0.258,
      "grad_norm": 0.5567331910133362,
      "learning_rate": 7.82483501712469e-06,
      "loss": 0.3605,
      "step": 1890
    },
    {
      "epoch": 0.2582,
      "grad_norm": 0.5787758231163025,
      "learning_rate": 7.821954139513997e-06,
      "loss": 0.3754,
      "step": 1891
    },
    {
      "epoch": 0.2584,
      "grad_norm": 0.39288538694381714,
      "learning_rate": 7.819071886517134e-06,
      "loss": 0.3225,
      "step": 1892
    },
    {
      "epoch": 0.2586,
      "grad_norm": 0.4381064772605896,
      "learning_rate": 7.816188259538885e-06,
      "loss": 0.307,
      "step": 1893
    },
    {
      "epoch": 0.2588,
      "grad_norm": 0.4751034080982208,
      "learning_rate": 7.813303259984685e-06,
      "loss": 0.3515,
      "step": 1894
    },
    {
      "epoch": 0.259,
      "grad_norm": 0.5139349102973938,
      "learning_rate": 7.810416889260653e-06,
      "loss": 0.3413,
      "step": 1895
    },
    {
      "epoch": 0.2592,
      "grad_norm": 0.5320213437080383,
      "learning_rate": 7.807529148773572e-06,
      "loss": 0.3377,
      "step": 1896
    },
    {
      "epoch": 0.2594,
      "grad_norm": 0.5293605923652649,
      "learning_rate": 7.80464003993089e-06,
      "loss": 0.3117,
      "step": 1897
    },
    {
      "epoch": 0.2596,
      "grad_norm": 0.541007399559021,
      "learning_rate": 7.801749564140724e-06,
      "loss": 0.3338,
      "step": 1898
    },
    {
      "epoch": 0.2598,
      "grad_norm": 0.6516367793083191,
      "learning_rate": 7.798857722811857e-06,
      "loss": 0.3101,
      "step": 1899
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.7375134229660034,
      "learning_rate": 7.795964517353734e-06,
      "loss": 0.3712,
      "step": 1900
    },
    {
      "epoch": 0.2602,
      "grad_norm": 0.5210551023483276,
      "learning_rate": 7.793069949176474e-06,
      "loss": 0.3456,
      "step": 1901
    },
    {
      "epoch": 0.2604,
      "grad_norm": 0.7789885401725769,
      "learning_rate": 7.79017401969085e-06,
      "loss": 0.3269,
      "step": 1902
    },
    {
      "epoch": 0.2606,
      "grad_norm": 0.67107754945755,
      "learning_rate": 7.787276730308304e-06,
      "loss": 0.3622,
      "step": 1903
    },
    {
      "epoch": 0.2608,
      "grad_norm": 0.4610104560852051,
      "learning_rate": 7.78437808244094e-06,
      "loss": 0.3351,
      "step": 1904
    },
    {
      "epoch": 0.261,
      "grad_norm": 0.5061287879943848,
      "learning_rate": 7.781478077501526e-06,
      "loss": 0.3253,
      "step": 1905
    },
    {
      "epoch": 0.2612,
      "grad_norm": 0.5118182897567749,
      "learning_rate": 7.778576716903484e-06,
      "loss": 0.3613,
      "step": 1906
    },
    {
      "epoch": 0.2614,
      "grad_norm": 0.44699108600616455,
      "learning_rate": 7.775674002060905e-06,
      "loss": 0.3458,
      "step": 1907
    },
    {
      "epoch": 0.2616,
      "grad_norm": 0.5182132720947266,
      "learning_rate": 7.772769934388537e-06,
      "loss": 0.3274,
      "step": 1908
    },
    {
      "epoch": 0.2618,
      "grad_norm": 0.4982025921344757,
      "learning_rate": 7.769864515301787e-06,
      "loss": 0.3642,
      "step": 1909
    },
    {
      "epoch": 0.262,
      "grad_norm": 0.5806662440299988,
      "learning_rate": 7.76695774621672e-06,
      "loss": 0.3391,
      "step": 1910
    },
    {
      "epoch": 0.2622,
      "grad_norm": 0.49668729305267334,
      "learning_rate": 7.764049628550063e-06,
      "loss": 0.3201,
      "step": 1911
    },
    {
      "epoch": 0.2624,
      "grad_norm": 0.5124812126159668,
      "learning_rate": 7.761140163719194e-06,
      "loss": 0.3961,
      "step": 1912
    },
    {
      "epoch": 0.2626,
      "grad_norm": 0.5066761374473572,
      "learning_rate": 7.758229353142153e-06,
      "loss": 0.3447,
      "step": 1913
    },
    {
      "epoch": 0.2628,
      "grad_norm": 0.5241420865058899,
      "learning_rate": 7.755317198237631e-06,
      "loss": 0.3332,
      "step": 1914
    },
    {
      "epoch": 0.263,
      "grad_norm": 0.4833812415599823,
      "learning_rate": 7.752403700424978e-06,
      "loss": 0.326,
      "step": 1915
    },
    {
      "epoch": 0.2632,
      "grad_norm": 0.502720057964325,
      "learning_rate": 7.7494888611242e-06,
      "loss": 0.3485,
      "step": 1916
    },
    {
      "epoch": 0.2634,
      "grad_norm": 0.7095934152603149,
      "learning_rate": 7.74657268175595e-06,
      "loss": 0.3195,
      "step": 1917
    },
    {
      "epoch": 0.2636,
      "grad_norm": 0.4401277005672455,
      "learning_rate": 7.743655163741544e-06,
      "loss": 0.3325,
      "step": 1918
    },
    {
      "epoch": 0.2638,
      "grad_norm": 0.4732799232006073,
      "learning_rate": 7.740736308502939e-06,
      "loss": 0.35,
      "step": 1919
    },
    {
      "epoch": 0.264,
      "grad_norm": 1.0283637046813965,
      "learning_rate": 7.737816117462752e-06,
      "loss": 0.3332,
      "step": 1920
    },
    {
      "epoch": 0.2642,
      "grad_norm": 0.5007339715957642,
      "learning_rate": 7.734894592044249e-06,
      "loss": 0.3555,
      "step": 1921
    },
    {
      "epoch": 0.2644,
      "grad_norm": 0.5074384212493896,
      "learning_rate": 7.731971733671347e-06,
      "loss": 0.3399,
      "step": 1922
    },
    {
      "epoch": 0.2646,
      "grad_norm": 0.5747765898704529,
      "learning_rate": 7.729047543768608e-06,
      "loss": 0.3483,
      "step": 1923
    },
    {
      "epoch": 0.2648,
      "grad_norm": 0.5095292925834656,
      "learning_rate": 7.726122023761252e-06,
      "loss": 0.3395,
      "step": 1924
    },
    {
      "epoch": 0.265,
      "grad_norm": 0.5077722072601318,
      "learning_rate": 7.723195175075136e-06,
      "loss": 0.3402,
      "step": 1925
    },
    {
      "epoch": 0.2652,
      "grad_norm": 0.39059382677078247,
      "learning_rate": 7.720266999136774e-06,
      "loss": 0.329,
      "step": 1926
    },
    {
      "epoch": 0.2654,
      "grad_norm": 0.4563088119029999,
      "learning_rate": 7.717337497373324e-06,
      "loss": 0.3498,
      "step": 1927
    },
    {
      "epoch": 0.2656,
      "grad_norm": 0.43444493412971497,
      "learning_rate": 7.714406671212589e-06,
      "loss": 0.3597,
      "step": 1928
    },
    {
      "epoch": 0.2658,
      "grad_norm": 0.413984477519989,
      "learning_rate": 7.711474522083015e-06,
      "loss": 0.3381,
      "step": 1929
    },
    {
      "epoch": 0.266,
      "grad_norm": 0.4515305757522583,
      "learning_rate": 7.7085410514137e-06,
      "loss": 0.3144,
      "step": 1930
    },
    {
      "epoch": 0.2662,
      "grad_norm": 0.40716683864593506,
      "learning_rate": 7.70560626063438e-06,
      "loss": 0.3447,
      "step": 1931
    },
    {
      "epoch": 0.2664,
      "grad_norm": 0.5051158666610718,
      "learning_rate": 7.702670151175435e-06,
      "loss": 0.355,
      "step": 1932
    },
    {
      "epoch": 0.2666,
      "grad_norm": 0.5400509238243103,
      "learning_rate": 7.699732724467894e-06,
      "loss": 0.3167,
      "step": 1933
    },
    {
      "epoch": 0.2668,
      "grad_norm": 0.44298356771469116,
      "learning_rate": 7.696793981943418e-06,
      "loss": 0.3474,
      "step": 1934
    },
    {
      "epoch": 0.267,
      "grad_norm": 0.6215180158615112,
      "learning_rate": 7.693853925034316e-06,
      "loss": 0.4146,
      "step": 1935
    },
    {
      "epoch": 0.2672,
      "grad_norm": 0.4682755768299103,
      "learning_rate": 7.690912555173536e-06,
      "loss": 0.3389,
      "step": 1936
    },
    {
      "epoch": 0.2674,
      "grad_norm": 0.41878724098205566,
      "learning_rate": 7.687969873794667e-06,
      "loss": 0.3489,
      "step": 1937
    },
    {
      "epoch": 0.2676,
      "grad_norm": 0.5345345139503479,
      "learning_rate": 7.685025882331936e-06,
      "loss": 0.3527,
      "step": 1938
    },
    {
      "epoch": 0.2678,
      "grad_norm": 0.6548563241958618,
      "learning_rate": 7.682080582220206e-06,
      "loss": 0.3043,
      "step": 1939
    },
    {
      "epoch": 0.268,
      "grad_norm": 0.5123564600944519,
      "learning_rate": 7.679133974894984e-06,
      "loss": 0.342,
      "step": 1940
    },
    {
      "epoch": 0.2682,
      "grad_norm": 0.3957490622997284,
      "learning_rate": 7.676186061792408e-06,
      "loss": 0.2845,
      "step": 1941
    },
    {
      "epoch": 0.2684,
      "grad_norm": 0.4236975610256195,
      "learning_rate": 7.673236844349257e-06,
      "loss": 0.3284,
      "step": 1942
    },
    {
      "epoch": 0.2686,
      "grad_norm": 0.4744342863559723,
      "learning_rate": 7.670286324002943e-06,
      "loss": 0.3398,
      "step": 1943
    },
    {
      "epoch": 0.2688,
      "grad_norm": 0.4888283610343933,
      "learning_rate": 7.667334502191514e-06,
      "loss": 0.3522,
      "step": 1944
    },
    {
      "epoch": 0.269,
      "grad_norm": 0.454204797744751,
      "learning_rate": 7.66438138035365e-06,
      "loss": 0.3603,
      "step": 1945
    },
    {
      "epoch": 0.2692,
      "grad_norm": 0.457830548286438,
      "learning_rate": 7.66142695992867e-06,
      "loss": 0.3767,
      "step": 1946
    },
    {
      "epoch": 0.2694,
      "grad_norm": 0.3910222053527832,
      "learning_rate": 7.658471242356521e-06,
      "loss": 0.3437,
      "step": 1947
    },
    {
      "epoch": 0.2696,
      "grad_norm": 0.4219287931919098,
      "learning_rate": 7.655514229077784e-06,
      "loss": 0.3553,
      "step": 1948
    },
    {
      "epoch": 0.2698,
      "grad_norm": 0.4466967284679413,
      "learning_rate": 7.652555921533671e-06,
      "loss": 0.3336,
      "step": 1949
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.44050106406211853,
      "learning_rate": 7.649596321166024e-06,
      "loss": 0.316,
      "step": 1950
    },
    {
      "epoch": 0.2702,
      "grad_norm": 0.5580093860626221,
      "learning_rate": 7.646635429417322e-06,
      "loss": 0.3022,
      "step": 1951
    },
    {
      "epoch": 0.2704,
      "grad_norm": 0.7066740989685059,
      "learning_rate": 7.64367324773066e-06,
      "loss": 0.3822,
      "step": 1952
    },
    {
      "epoch": 0.2706,
      "grad_norm": 0.418081134557724,
      "learning_rate": 7.640709777549773e-06,
      "loss": 0.3527,
      "step": 1953
    },
    {
      "epoch": 0.2708,
      "grad_norm": 0.473508358001709,
      "learning_rate": 7.637745020319019e-06,
      "loss": 0.3645,
      "step": 1954
    },
    {
      "epoch": 0.271,
      "grad_norm": 0.6344271302223206,
      "learning_rate": 7.634778977483389e-06,
      "loss": 0.3558,
      "step": 1955
    },
    {
      "epoch": 0.2712,
      "grad_norm": 0.5457620024681091,
      "learning_rate": 7.63181165048849e-06,
      "loss": 0.3383,
      "step": 1956
    },
    {
      "epoch": 0.0002,
      "grad_norm": 0.5247291922569275,
      "learning_rate": 7.628843040780567e-06,
      "loss": 0.3323,
      "step": 1957
    },
    {
      "epoch": 0.0004,
      "grad_norm": 0.4888128340244293,
      "learning_rate": 7.6258731498064796e-06,
      "loss": 0.346,
      "step": 1958
    },
    {
      "epoch": 0.0006,
      "grad_norm": 0.5200962424278259,
      "learning_rate": 7.622901979013717e-06,
      "loss": 0.3228,
      "step": 1959
    },
    {
      "epoch": 0.0008,
      "grad_norm": 0.48957309126853943,
      "learning_rate": 7.619929529850397e-06,
      "loss": 0.341,
      "step": 1960
    },
    {
      "epoch": 0.001,
      "grad_norm": 0.3713715076446533,
      "learning_rate": 7.616955803765249e-06,
      "loss": 0.3174,
      "step": 1961
    },
    {
      "epoch": 0.0012,
      "grad_norm": 0.5366963148117065,
      "learning_rate": 7.613980802207633e-06,
      "loss": 0.3409,
      "step": 1962
    },
    {
      "epoch": 0.0014,
      "grad_norm": 0.4886382818222046,
      "learning_rate": 7.6110045266275305e-06,
      "loss": 0.3363,
      "step": 1963
    },
    {
      "epoch": 0.0016,
      "grad_norm": 0.5133734345436096,
      "learning_rate": 7.6080269784755405e-06,
      "loss": 0.3335,
      "step": 1964
    },
    {
      "epoch": 0.0018,
      "grad_norm": 0.5300120115280151,
      "learning_rate": 7.605048159202884e-06,
      "loss": 0.3444,
      "step": 1965
    },
    {
      "epoch": 0.002,
      "grad_norm": 0.8394526243209839,
      "learning_rate": 7.6020680702613995e-06,
      "loss": 0.3275,
      "step": 1966
    },
    {
      "epoch": 0.0022,
      "grad_norm": 0.48089638352394104,
      "learning_rate": 7.5990867131035474e-06,
      "loss": 0.3569,
      "step": 1967
    },
    {
      "epoch": 0.0024,
      "grad_norm": 0.4852306842803955,
      "learning_rate": 7.596104089182408e-06,
      "loss": 0.3239,
      "step": 1968
    },
    {
      "epoch": 0.0026,
      "grad_norm": 0.47947973012924194,
      "learning_rate": 7.5931201999516715e-06,
      "loss": 0.3388,
      "step": 1969
    },
    {
      "epoch": 0.0028,
      "grad_norm": 0.49630337953567505,
      "learning_rate": 7.590135046865652e-06,
      "loss": 0.3441,
      "step": 1970
    },
    {
      "epoch": 0.003,
      "grad_norm": 0.5084373950958252,
      "learning_rate": 7.587148631379276e-06,
      "loss": 0.3461,
      "step": 1971
    },
    {
      "epoch": 0.0032,
      "grad_norm": 0.49386221170425415,
      "learning_rate": 7.5841609549480854e-06,
      "loss": 0.3804,
      "step": 1972
    },
    {
      "epoch": 0.0034,
      "grad_norm": 0.502303957939148,
      "learning_rate": 7.581172019028238e-06,
      "loss": 0.3587,
      "step": 1973
    },
    {
      "epoch": 0.0036,
      "grad_norm": 0.6351799368858337,
      "learning_rate": 7.578181825076506e-06,
      "loss": 0.3284,
      "step": 1974
    },
    {
      "epoch": 0.0038,
      "grad_norm": 0.9249539971351624,
      "learning_rate": 7.575190374550272e-06,
      "loss": 0.3668,
      "step": 1975
    },
    {
      "epoch": 0.004,
      "grad_norm": 0.4281349778175354,
      "learning_rate": 7.572197668907533e-06,
      "loss": 0.3419,
      "step": 1976
    },
    {
      "epoch": 0.0042,
      "grad_norm": 0.5452465415000916,
      "learning_rate": 7.569203709606898e-06,
      "loss": 0.3745,
      "step": 1977
    },
    {
      "epoch": 0.0044,
      "grad_norm": 0.4201561510562897,
      "learning_rate": 7.566208498107586e-06,
      "loss": 0.3465,
      "step": 1978
    },
    {
      "epoch": 0.0046,
      "grad_norm": 0.5286946892738342,
      "learning_rate": 7.563212035869426e-06,
      "loss": 0.3203,
      "step": 1979
    },
    {
      "epoch": 0.0048,
      "grad_norm": 0.431071937084198,
      "learning_rate": 7.560214324352858e-06,
      "loss": 0.3158,
      "step": 1980
    },
    {
      "epoch": 0.005,
      "grad_norm": 0.5442622303962708,
      "learning_rate": 7.55721536501893e-06,
      "loss": 0.3446,
      "step": 1981
    },
    {
      "epoch": 0.0052,
      "grad_norm": 0.5946123600006104,
      "learning_rate": 7.5542151593293e-06,
      "loss": 0.3658,
      "step": 1982
    },
    {
      "epoch": 0.0054,
      "grad_norm": 0.4926307201385498,
      "learning_rate": 7.55121370874623e-06,
      "loss": 0.3614,
      "step": 1983
    },
    {
      "epoch": 0.0056,
      "grad_norm": 0.7171974182128906,
      "learning_rate": 7.548211014732589e-06,
      "loss": 0.3213,
      "step": 1984
    },
    {
      "epoch": 0.0058,
      "grad_norm": 0.4702602028846741,
      "learning_rate": 7.545207078751858e-06,
      "loss": 0.3391,
      "step": 1985
    },
    {
      "epoch": 0.006,
      "grad_norm": 0.7097549438476562,
      "learning_rate": 7.542201902268115e-06,
      "loss": 0.3406,
      "step": 1986
    },
    {
      "epoch": 0.0062,
      "grad_norm": 0.5041736960411072,
      "learning_rate": 7.539195486746047e-06,
      "loss": 0.3594,
      "step": 1987
    },
    {
      "epoch": 0.0064,
      "grad_norm": 0.546208918094635,
      "learning_rate": 7.536187833650947e-06,
      "loss": 0.3647,
      "step": 1988
    },
    {
      "epoch": 0.0066,
      "grad_norm": 0.3883594572544098,
      "learning_rate": 7.533178944448705e-06,
      "loss": 0.3242,
      "step": 1989
    },
    {
      "epoch": 0.0068,
      "grad_norm": 0.5467747449874878,
      "learning_rate": 7.530168820605819e-06,
      "loss": 0.3491,
      "step": 1990
    },
    {
      "epoch": 0.007,
      "grad_norm": 0.45307931303977966,
      "learning_rate": 7.527157463589389e-06,
      "loss": 0.3149,
      "step": 1991
    },
    {
      "epoch": 0.0072,
      "grad_norm": 0.5452401638031006,
      "learning_rate": 7.52414487486711e-06,
      "loss": 0.3756,
      "step": 1992
    },
    {
      "epoch": 0.0074,
      "grad_norm": 0.4953776001930237,
      "learning_rate": 7.521131055907283e-06,
      "loss": 0.3492,
      "step": 1993
    },
    {
      "epoch": 0.0076,
      "grad_norm": 0.4388226866722107,
      "learning_rate": 7.518116008178805e-06,
      "loss": 0.3626,
      "step": 1994
    },
    {
      "epoch": 0.0078,
      "grad_norm": 0.5581281781196594,
      "learning_rate": 7.515099733151177e-06,
      "loss": 0.3255,
      "step": 1995
    },
    {
      "epoch": 0.008,
      "grad_norm": 0.5194584727287292,
      "learning_rate": 7.512082232294491e-06,
      "loss": 0.3507,
      "step": 1996
    },
    {
      "epoch": 0.0082,
      "grad_norm": 0.47394344210624695,
      "learning_rate": 7.509063507079443e-06,
      "loss": 0.3799,
      "step": 1997
    },
    {
      "epoch": 0.0084,
      "grad_norm": 0.7874463796615601,
      "learning_rate": 7.5060435589773215e-06,
      "loss": 0.3491,
      "step": 1998
    },
    {
      "epoch": 0.0086,
      "grad_norm": 0.4236779808998108,
      "learning_rate": 7.503022389460014e-06,
      "loss": 0.3493,
      "step": 1999
    },
    {
      "epoch": 0.0088,
      "grad_norm": 0.4569052755832672,
      "learning_rate": 7.500000000000001e-06,
      "loss": 0.3581,
      "step": 2000
    },
    {
      "epoch": 0.009,
      "grad_norm": 0.523994505405426,
      "learning_rate": 7.496976392070358e-06,
      "loss": 0.3479,
      "step": 2001
    },
    {
      "epoch": 0.0092,
      "grad_norm": 0.557335376739502,
      "learning_rate": 7.493951567144755e-06,
      "loss": 0.3703,
      "step": 2002
    },
    {
      "epoch": 0.0094,
      "grad_norm": 0.48520904779434204,
      "learning_rate": 7.490925526697455e-06,
      "loss": 0.3433,
      "step": 2003
    },
    {
      "epoch": 0.0096,
      "grad_norm": 0.489317387342453,
      "learning_rate": 7.487898272203314e-06,
      "loss": 0.3448,
      "step": 2004
    },
    {
      "epoch": 0.0098,
      "grad_norm": 0.4408498704433441,
      "learning_rate": 7.484869805137778e-06,
      "loss": 0.3792,
      "step": 2005
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5164101719856262,
      "learning_rate": 7.481840126976885e-06,
      "loss": 0.3933,
      "step": 2006
    },
    {
      "epoch": 0.0102,
      "grad_norm": 0.5376119017601013,
      "learning_rate": 7.478809239197264e-06,
      "loss": 0.3422,
      "step": 2007
    },
    {
      "epoch": 0.0104,
      "grad_norm": 0.5083914399147034,
      "learning_rate": 7.475777143276133e-06,
      "loss": 0.3371,
      "step": 2008
    },
    {
      "epoch": 0.0106,
      "grad_norm": 0.5116211771965027,
      "learning_rate": 7.4727438406912986e-06,
      "loss": 0.3402,
      "step": 2009
    },
    {
      "epoch": 0.0108,
      "grad_norm": 0.4717080295085907,
      "learning_rate": 7.469709332921155e-06,
      "loss": 0.3487,
      "step": 2010
    },
    {
      "epoch": 0.011,
      "grad_norm": 0.6862502694129944,
      "learning_rate": 7.4666736214446855e-06,
      "loss": 0.3746,
      "step": 2011
    },
    {
      "epoch": 0.0112,
      "grad_norm": 0.5812677145004272,
      "learning_rate": 7.463636707741458e-06,
      "loss": 0.3252,
      "step": 2012
    },
    {
      "epoch": 0.0114,
      "grad_norm": 0.7191124558448792,
      "learning_rate": 7.460598593291628e-06,
      "loss": 0.3795,
      "step": 2013
    },
    {
      "epoch": 0.0116,
      "grad_norm": 0.49589139223098755,
      "learning_rate": 7.4575592795759356e-06,
      "loss": 0.3183,
      "step": 2014
    },
    {
      "epoch": 0.0118,
      "grad_norm": 0.6598015427589417,
      "learning_rate": 7.454518768075705e-06,
      "loss": 0.3077,
      "step": 2015
    },
    {
      "epoch": 0.012,
      "grad_norm": 0.48124298453330994,
      "learning_rate": 7.451477060272844e-06,
      "loss": 0.3319,
      "step": 2016
    },
    {
      "epoch": 0.0122,
      "grad_norm": 0.44923171401023865,
      "learning_rate": 7.448434157649846e-06,
      "loss": 0.3481,
      "step": 2017
    },
    {
      "epoch": 0.0124,
      "grad_norm": 0.5113219022750854,
      "learning_rate": 7.445390061689782e-06,
      "loss": 0.3835,
      "step": 2018
    },
    {
      "epoch": 0.0126,
      "grad_norm": 0.5863579511642456,
      "learning_rate": 7.44234477387631e-06,
      "loss": 0.3396,
      "step": 2019
    },
    {
      "epoch": 0.0128,
      "grad_norm": 0.44309714436531067,
      "learning_rate": 7.4392982956936644e-06,
      "loss": 0.3574,
      "step": 2020
    },
    {
      "epoch": 0.013,
      "grad_norm": 0.45570123195648193,
      "learning_rate": 7.436250628626662e-06,
      "loss": 0.3376,
      "step": 2021
    },
    {
      "epoch": 0.0132,
      "grad_norm": 0.5220116376876831,
      "learning_rate": 7.433201774160701e-06,
      "loss": 0.3589,
      "step": 2022
    },
    {
      "epoch": 0.0134,
      "grad_norm": 0.6465103030204773,
      "learning_rate": 7.430151733781752e-06,
      "loss": 0.36,
      "step": 2023
    },
    {
      "epoch": 0.0136,
      "grad_norm": 0.48689383268356323,
      "learning_rate": 7.42710050897637e-06,
      "loss": 0.3401,
      "step": 2024
    },
    {
      "epoch": 0.0138,
      "grad_norm": 0.7006555199623108,
      "learning_rate": 7.424048101231687e-06,
      "loss": 0.3477,
      "step": 2025
    },
    {
      "epoch": 0.014,
      "grad_norm": 0.5522056818008423,
      "learning_rate": 7.4209945120354045e-06,
      "loss": 0.3539,
      "step": 2026
    },
    {
      "epoch": 0.0142,
      "grad_norm": 0.579373836517334,
      "learning_rate": 7.4179397428758085e-06,
      "loss": 0.3907,
      "step": 2027
    },
    {
      "epoch": 0.0144,
      "grad_norm": 1.0325504541397095,
      "learning_rate": 7.414883795241754e-06,
      "loss": 0.3622,
      "step": 2028
    },
    {
      "epoch": 0.0146,
      "grad_norm": 0.601061999797821,
      "learning_rate": 7.411826670622676e-06,
      "loss": 0.3554,
      "step": 2029
    },
    {
      "epoch": 0.0148,
      "grad_norm": 0.5156790614128113,
      "learning_rate": 7.408768370508577e-06,
      "loss": 0.3831,
      "step": 2030
    },
    {
      "epoch": 0.015,
      "grad_norm": 0.3972161114215851,
      "learning_rate": 7.405708896390037e-06,
      "loss": 0.2978,
      "step": 2031
    },
    {
      "epoch": 0.0152,
      "grad_norm": 0.5262656211853027,
      "learning_rate": 7.402648249758204e-06,
      "loss": 0.3382,
      "step": 2032
    },
    {
      "epoch": 0.0154,
      "grad_norm": 0.4250008165836334,
      "learning_rate": 7.3995864321048036e-06,
      "loss": 0.336,
      "step": 2033
    },
    {
      "epoch": 0.0156,
      "grad_norm": 0.5237293243408203,
      "learning_rate": 7.396523444922126e-06,
      "loss": 0.3471,
      "step": 2034
    },
    {
      "epoch": 0.0158,
      "grad_norm": 0.4458763003349304,
      "learning_rate": 7.393459289703035e-06,
      "loss": 0.33,
      "step": 2035
    },
    {
      "epoch": 0.016,
      "grad_norm": 0.5107957124710083,
      "learning_rate": 7.390393967940962e-06,
      "loss": 0.3388,
      "step": 2036
    },
    {
      "epoch": 0.0162,
      "grad_norm": 0.48422038555145264,
      "learning_rate": 7.3873274811299065e-06,
      "loss": 0.3508,
      "step": 2037
    },
    {
      "epoch": 0.0164,
      "grad_norm": 0.44265034794807434,
      "learning_rate": 7.3842598307644396e-06,
      "loss": 0.3509,
      "step": 2038
    },
    {
      "epoch": 0.0166,
      "grad_norm": 0.4492121636867523,
      "learning_rate": 7.381191018339697e-06,
      "loss": 0.3315,
      "step": 2039
    },
    {
      "epoch": 0.0168,
      "grad_norm": 0.454904705286026,
      "learning_rate": 7.378121045351378e-06,
      "loss": 0.33,
      "step": 2040
    },
    {
      "epoch": 0.017,
      "grad_norm": 0.4073432981967926,
      "learning_rate": 7.37504991329575e-06,
      "loss": 0.3311,
      "step": 2041
    },
    {
      "epoch": 0.0172,
      "grad_norm": 0.5167952179908752,
      "learning_rate": 7.371977623669646e-06,
      "loss": 0.3516,
      "step": 2042
    },
    {
      "epoch": 0.0174,
      "grad_norm": 0.5046054124832153,
      "learning_rate": 7.368904177970466e-06,
      "loss": 0.371,
      "step": 2043
    },
    {
      "epoch": 0.0176,
      "grad_norm": 0.5715007185935974,
      "learning_rate": 7.365829577696166e-06,
      "loss": 0.3739,
      "step": 2044
    },
    {
      "epoch": 0.0178,
      "grad_norm": 0.5807520747184753,
      "learning_rate": 7.362753824345271e-06,
      "loss": 0.3673,
      "step": 2045
    },
    {
      "epoch": 0.018,
      "grad_norm": 0.602292001247406,
      "learning_rate": 7.3596769194168646e-06,
      "loss": 0.3514,
      "step": 2046
    },
    {
      "epoch": 0.0182,
      "grad_norm": 0.47205105423927307,
      "learning_rate": 7.3565988644105926e-06,
      "loss": 0.3498,
      "step": 2047
    },
    {
      "epoch": 0.0184,
      "grad_norm": 0.3608076870441437,
      "learning_rate": 7.353519660826665e-06,
      "loss": 0.3116,
      "step": 2048
    },
    {
      "epoch": 0.0186,
      "grad_norm": 0.4525229036808014,
      "learning_rate": 7.350439310165842e-06,
      "loss": 0.3287,
      "step": 2049
    },
    {
      "epoch": 0.0188,
      "grad_norm": 0.4847429096698761,
      "learning_rate": 7.347357813929455e-06,
      "loss": 0.334,
      "step": 2050
    },
    {
      "epoch": 0.019,
      "grad_norm": 0.5081378817558289,
      "learning_rate": 7.344275173619385e-06,
      "loss": 0.3155,
      "step": 2051
    },
    {
      "epoch": 0.0192,
      "grad_norm": 0.563098132610321,
      "learning_rate": 7.341191390738073e-06,
      "loss": 0.3237,
      "step": 2052
    },
    {
      "epoch": 0.0194,
      "grad_norm": 0.4151829779148102,
      "learning_rate": 7.33810646678852e-06,
      "loss": 0.333,
      "step": 2053
    },
    {
      "epoch": 0.0196,
      "grad_norm": 0.5238422751426697,
      "learning_rate": 7.335020403274277e-06,
      "loss": 0.3457,
      "step": 2054
    },
    {
      "epoch": 0.0198,
      "grad_norm": 0.3864416480064392,
      "learning_rate": 7.3319332016994575e-06,
      "loss": 0.3388,
      "step": 2055
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5464614033699036,
      "learning_rate": 7.3288448635687215e-06,
      "loss": 0.3582,
      "step": 2056
    },
    {
      "epoch": 0.0202,
      "grad_norm": 0.4646517038345337,
      "learning_rate": 7.325755390387293e-06,
      "loss": 0.3452,
      "step": 2057
    },
    {
      "epoch": 0.0204,
      "grad_norm": 0.4267030656337738,
      "learning_rate": 7.32266478366094e-06,
      "loss": 0.3471,
      "step": 2058
    },
    {
      "epoch": 0.0206,
      "grad_norm": 0.506901204586029,
      "learning_rate": 7.319573044895986e-06,
      "loss": 0.3397,
      "step": 2059
    },
    {
      "epoch": 0.0208,
      "grad_norm": 0.4126685857772827,
      "learning_rate": 7.31648017559931e-06,
      "loss": 0.3392,
      "step": 2060
    },
    {
      "epoch": 0.021,
      "grad_norm": 0.48399507999420166,
      "learning_rate": 7.313386177278335e-06,
      "loss": 0.3122,
      "step": 2061
    },
    {
      "epoch": 0.0212,
      "grad_norm": 0.4047775864601135,
      "learning_rate": 7.310291051441044e-06,
      "loss": 0.3437,
      "step": 2062
    },
    {
      "epoch": 0.0214,
      "grad_norm": 0.6905864477157593,
      "learning_rate": 7.307194799595958e-06,
      "loss": 0.3336,
      "step": 2063
    },
    {
      "epoch": 0.0216,
      "grad_norm": 0.47369927167892456,
      "learning_rate": 7.3040974232521555e-06,
      "loss": 0.3281,
      "step": 2064
    },
    {
      "epoch": 0.0218,
      "grad_norm": 0.4820784032344818,
      "learning_rate": 7.300998923919259e-06,
      "loss": 0.3609,
      "step": 2065
    },
    {
      "epoch": 0.022,
      "grad_norm": 0.4639589488506317,
      "learning_rate": 7.297899303107441e-06,
      "loss": 0.3537,
      "step": 2066
    },
    {
      "epoch": 0.0222,
      "grad_norm": 0.8419828414916992,
      "learning_rate": 7.294798562327417e-06,
      "loss": 0.3332,
      "step": 2067
    },
    {
      "epoch": 0.0224,
      "grad_norm": 0.5323438048362732,
      "learning_rate": 7.291696703090449e-06,
      "loss": 0.3176,
      "step": 2068
    },
    {
      "epoch": 0.0226,
      "grad_norm": 0.6084098815917969,
      "learning_rate": 7.288593726908351e-06,
      "loss": 0.3525,
      "step": 2069
    },
    {
      "epoch": 0.0228,
      "grad_norm": 0.807461142539978,
      "learning_rate": 7.285489635293472e-06,
      "loss": 0.3308,
      "step": 2070
    },
    {
      "epoch": 0.023,
      "grad_norm": 0.5121889114379883,
      "learning_rate": 7.282384429758709e-06,
      "loss": 0.3705,
      "step": 2071
    },
    {
      "epoch": 0.0232,
      "grad_norm": 0.4141233563423157,
      "learning_rate": 7.279278111817502e-06,
      "loss": 0.3123,
      "step": 2072
    },
    {
      "epoch": 0.0234,
      "grad_norm": 0.6033976078033447,
      "learning_rate": 7.27617068298383e-06,
      "loss": 0.323,
      "step": 2073
    },
    {
      "epoch": 0.0236,
      "grad_norm": 0.9547064304351807,
      "learning_rate": 7.27306214477222e-06,
      "loss": 0.3481,
      "step": 2074
    },
    {
      "epoch": 0.0238,
      "grad_norm": 0.5374829173088074,
      "learning_rate": 7.269952498697734e-06,
      "loss": 0.3634,
      "step": 2075
    },
    {
      "epoch": 0.024,
      "grad_norm": 0.514323890209198,
      "learning_rate": 7.266841746275977e-06,
      "loss": 0.3624,
      "step": 2076
    },
    {
      "epoch": 0.0242,
      "grad_norm": 0.4654398262500763,
      "learning_rate": 7.26372988902309e-06,
      "loss": 0.3302,
      "step": 2077
    },
    {
      "epoch": 0.0244,
      "grad_norm": 0.4034883379936218,
      "learning_rate": 7.260616928455754e-06,
      "loss": 0.3559,
      "step": 2078
    },
    {
      "epoch": 0.0246,
      "grad_norm": 0.6969311237335205,
      "learning_rate": 7.257502866091192e-06,
      "loss": 0.3592,
      "step": 2079
    },
    {
      "epoch": 0.0248,
      "grad_norm": 0.5164963603019714,
      "learning_rate": 7.254387703447154e-06,
      "loss": 0.3561,
      "step": 2080
    },
    {
      "epoch": 0.025,
      "grad_norm": 0.47014549374580383,
      "learning_rate": 7.251271442041938e-06,
      "loss": 0.3309,
      "step": 2081
    },
    {
      "epoch": 0.0252,
      "grad_norm": 0.6408646106719971,
      "learning_rate": 7.24815408339437e-06,
      "loss": 0.3399,
      "step": 2082
    },
    {
      "epoch": 0.0254,
      "grad_norm": 0.46495363116264343,
      "learning_rate": 7.245035629023812e-06,
      "loss": 0.3493,
      "step": 2083
    },
    {
      "epoch": 0.0256,
      "grad_norm": 0.5733674168586731,
      "learning_rate": 7.241916080450163e-06,
      "loss": 0.3363,
      "step": 2084
    },
    {
      "epoch": 0.0258,
      "grad_norm": 0.9009961485862732,
      "learning_rate": 7.238795439193849e-06,
      "loss": 0.3265,
      "step": 2085
    },
    {
      "epoch": 0.026,
      "grad_norm": 0.5520738959312439,
      "learning_rate": 7.235673706775837e-06,
      "loss": 0.3588,
      "step": 2086
    },
    {
      "epoch": 0.0262,
      "grad_norm": 0.576304018497467,
      "learning_rate": 7.2325508847176175e-06,
      "loss": 0.3422,
      "step": 2087
    },
    {
      "epoch": 0.0264,
      "grad_norm": 0.44095030426979065,
      "learning_rate": 7.2294269745412214e-06,
      "loss": 0.336,
      "step": 2088
    },
    {
      "epoch": 0.0266,
      "grad_norm": 0.4962732791900635,
      "learning_rate": 7.226301977769199e-06,
      "loss": 0.3507,
      "step": 2089
    },
    {
      "epoch": 0.0268,
      "grad_norm": 0.5811390280723572,
      "learning_rate": 7.223175895924638e-06,
      "loss": 0.3443,
      "step": 2090
    },
    {
      "epoch": 0.027,
      "grad_norm": 0.4818621873855591,
      "learning_rate": 7.220048730531154e-06,
      "loss": 0.3522,
      "step": 2091
    },
    {
      "epoch": 0.0272,
      "grad_norm": 0.4917876720428467,
      "learning_rate": 7.216920483112886e-06,
      "loss": 0.3478,
      "step": 2092
    },
    {
      "epoch": 0.0274,
      "grad_norm": 0.46301141381263733,
      "learning_rate": 7.21379115519451e-06,
      "loss": 0.3572,
      "step": 2093
    },
    {
      "epoch": 0.0276,
      "grad_norm": 0.5757348537445068,
      "learning_rate": 7.210660748301214e-06,
      "loss": 0.3348,
      "step": 2094
    },
    {
      "epoch": 0.0278,
      "grad_norm": 0.5995206236839294,
      "learning_rate": 7.207529263958727e-06,
      "loss": 0.3226,
      "step": 2095
    },
    {
      "epoch": 0.028,
      "grad_norm": 0.5566779971122742,
      "learning_rate": 7.2043967036932935e-06,
      "loss": 0.3671,
      "step": 2096
    },
    {
      "epoch": 0.0282,
      "grad_norm": 0.5118270516395569,
      "learning_rate": 7.201263069031686e-06,
      "loss": 0.3744,
      "step": 2097
    },
    {
      "epoch": 0.0284,
      "grad_norm": 0.5504781007766724,
      "learning_rate": 7.1981283615012e-06,
      "loss": 0.3561,
      "step": 2098
    },
    {
      "epoch": 0.0286,
      "grad_norm": 0.5034992694854736,
      "learning_rate": 7.194992582629654e-06,
      "loss": 0.3365,
      "step": 2099
    },
    {
      "epoch": 0.0288,
      "grad_norm": 0.5536651015281677,
      "learning_rate": 7.191855733945388e-06,
      "loss": 0.3642,
      "step": 2100
    },
    {
      "epoch": 0.029,
      "grad_norm": 1.0141940116882324,
      "learning_rate": 7.188717816977264e-06,
      "loss": 0.3521,
      "step": 2101
    },
    {
      "epoch": 0.0292,
      "grad_norm": 0.5349705815315247,
      "learning_rate": 7.185578833254665e-06,
      "loss": 0.3302,
      "step": 2102
    },
    {
      "epoch": 0.0294,
      "grad_norm": 0.4643726646900177,
      "learning_rate": 7.182438784307495e-06,
      "loss": 0.3746,
      "step": 2103
    },
    {
      "epoch": 0.0296,
      "grad_norm": 0.5080054998397827,
      "learning_rate": 7.179297671666171e-06,
      "loss": 0.3057,
      "step": 2104
    },
    {
      "epoch": 0.0298,
      "grad_norm": 0.42130422592163086,
      "learning_rate": 7.176155496861639e-06,
      "loss": 0.3124,
      "step": 2105
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5823014974594116,
      "learning_rate": 7.173012261425352e-06,
      "loss": 0.3832,
      "step": 2106
    },
    {
      "epoch": 0.0302,
      "grad_norm": 0.493636816740036,
      "learning_rate": 7.169867966889288e-06,
      "loss": 0.3483,
      "step": 2107
    },
    {
      "epoch": 0.0304,
      "grad_norm": 0.5518170595169067,
      "learning_rate": 7.166722614785937e-06,
      "loss": 0.359,
      "step": 2108
    },
    {
      "epoch": 0.0306,
      "grad_norm": 0.4645494222640991,
      "learning_rate": 7.1635762066483035e-06,
      "loss": 0.3528,
      "step": 2109
    },
    {
      "epoch": 0.0308,
      "grad_norm": 0.535865306854248,
      "learning_rate": 7.160428744009913e-06,
      "loss": 0.3818,
      "step": 2110
    },
    {
      "epoch": 0.031,
      "grad_norm": 0.4758320748806,
      "learning_rate": 7.157280228404796e-06,
      "loss": 0.3454,
      "step": 2111
    },
    {
      "epoch": 0.0312,
      "grad_norm": 0.4180643856525421,
      "learning_rate": 7.154130661367503e-06,
      "loss": 0.3562,
      "step": 2112
    },
    {
      "epoch": 0.0314,
      "grad_norm": 0.5592344999313354,
      "learning_rate": 7.150980044433094e-06,
      "loss": 0.3234,
      "step": 2113
    },
    {
      "epoch": 0.0316,
      "grad_norm": 0.6586885452270508,
      "learning_rate": 7.1478283791371415e-06,
      "loss": 0.3049,
      "step": 2114
    },
    {
      "epoch": 0.0318,
      "grad_norm": 0.4379710555076599,
      "learning_rate": 7.1446756670157306e-06,
      "loss": 0.3421,
      "step": 2115
    },
    {
      "epoch": 0.032,
      "grad_norm": 0.3934008777141571,
      "learning_rate": 7.141521909605452e-06,
      "loss": 0.351,
      "step": 2116
    },
    {
      "epoch": 0.0322,
      "grad_norm": 0.49233245849609375,
      "learning_rate": 7.138367108443411e-06,
      "loss": 0.3317,
      "step": 2117
    },
    {
      "epoch": 0.0324,
      "grad_norm": 0.45837852358818054,
      "learning_rate": 7.135211265067217e-06,
      "loss": 0.332,
      "step": 2118
    },
    {
      "epoch": 0.0326,
      "grad_norm": 0.6981955170631409,
      "learning_rate": 7.1320543810149945e-06,
      "loss": 0.3234,
      "step": 2119
    },
    {
      "epoch": 0.0328,
      "grad_norm": 0.5222969651222229,
      "learning_rate": 7.128896457825364e-06,
      "loss": 0.3312,
      "step": 2120
    },
    {
      "epoch": 0.033,
      "grad_norm": 0.48079466819763184,
      "learning_rate": 7.125737497037464e-06,
      "loss": 0.3292,
      "step": 2121
    },
    {
      "epoch": 0.0332,
      "grad_norm": 0.4566836655139923,
      "learning_rate": 7.12257750019093e-06,
      "loss": 0.3159,
      "step": 2122
    },
    {
      "epoch": 0.0334,
      "grad_norm": 0.4272584617137909,
      "learning_rate": 7.119416468825908e-06,
      "loss": 0.372,
      "step": 2123
    },
    {
      "epoch": 0.0336,
      "grad_norm": 0.5171928405761719,
      "learning_rate": 7.116254404483049e-06,
      "loss": 0.3504,
      "step": 2124
    },
    {
      "epoch": 0.0338,
      "grad_norm": 0.5979363918304443,
      "learning_rate": 7.113091308703498e-06,
      "loss": 0.3717,
      "step": 2125
    },
    {
      "epoch": 0.034,
      "grad_norm": 0.42977452278137207,
      "learning_rate": 7.1099271830289155e-06,
      "loss": 0.3378,
      "step": 2126
    },
    {
      "epoch": 0.0342,
      "grad_norm": 0.5406255722045898,
      "learning_rate": 7.106762029001455e-06,
      "loss": 0.3654,
      "step": 2127
    },
    {
      "epoch": 0.0344,
      "grad_norm": 0.4411236643791199,
      "learning_rate": 7.103595848163775e-06,
      "loss": 0.3534,
      "step": 2128
    },
    {
      "epoch": 0.0346,
      "grad_norm": 0.6039050817489624,
      "learning_rate": 7.100428642059033e-06,
      "loss": 0.3394,
      "step": 2129
    },
    {
      "epoch": 0.0348,
      "grad_norm": 0.5494290590286255,
      "learning_rate": 7.0972604122308865e-06,
      "loss": 0.3132,
      "step": 2130
    },
    {
      "epoch": 0.035,
      "grad_norm": 0.41832444071769714,
      "learning_rate": 7.094091160223493e-06,
      "loss": 0.3414,
      "step": 2131
    },
    {
      "epoch": 0.0352,
      "grad_norm": 0.45470890402793884,
      "learning_rate": 7.090920887581507e-06,
      "loss": 0.362,
      "step": 2132
    },
    {
      "epoch": 0.0354,
      "grad_norm": 0.46628719568252563,
      "learning_rate": 7.087749595850084e-06,
      "loss": 0.3408,
      "step": 2133
    },
    {
      "epoch": 0.0356,
      "grad_norm": 0.5218459963798523,
      "learning_rate": 7.0845772865748684e-06,
      "loss": 0.3636,
      "step": 2134
    },
    {
      "epoch": 0.0358,
      "grad_norm": 0.4710923135280609,
      "learning_rate": 7.081403961302007e-06,
      "loss": 0.3316,
      "step": 2135
    },
    {
      "epoch": 0.036,
      "grad_norm": 0.48754340410232544,
      "learning_rate": 7.07822962157814e-06,
      "loss": 0.3484,
      "step": 2136
    },
    {
      "epoch": 0.0362,
      "grad_norm": 1.6779743432998657,
      "learning_rate": 7.075054268950402e-06,
      "loss": 0.38,
      "step": 2137
    },
    {
      "epoch": 0.0364,
      "grad_norm": 0.457530677318573,
      "learning_rate": 7.071877904966422e-06,
      "loss": 0.3573,
      "step": 2138
    },
    {
      "epoch": 0.0366,
      "grad_norm": 0.42116186022758484,
      "learning_rate": 7.0687005311743195e-06,
      "loss": 0.2872,
      "step": 2139
    },
    {
      "epoch": 0.0368,
      "grad_norm": 0.5083756446838379,
      "learning_rate": 7.06552214912271e-06,
      "loss": 0.3285,
      "step": 2140
    },
    {
      "epoch": 0.037,
      "grad_norm": 0.5251450538635254,
      "learning_rate": 7.0623427603606965e-06,
      "loss": 0.3688,
      "step": 2141
    },
    {
      "epoch": 0.0372,
      "grad_norm": 0.6173996329307556,
      "learning_rate": 7.059162366437875e-06,
      "loss": 0.3651,
      "step": 2142
    },
    {
      "epoch": 0.0374,
      "grad_norm": 1.5173795223236084,
      "learning_rate": 7.0559809689043325e-06,
      "loss": 0.3347,
      "step": 2143
    },
    {
      "epoch": 0.0376,
      "grad_norm": 0.5438259243965149,
      "learning_rate": 7.052798569310641e-06,
      "loss": 0.3526,
      "step": 2144
    },
    {
      "epoch": 0.0378,
      "grad_norm": 0.45668718218803406,
      "learning_rate": 7.049615169207864e-06,
      "loss": 0.337,
      "step": 2145
    },
    {
      "epoch": 0.038,
      "grad_norm": 0.5020473003387451,
      "learning_rate": 7.0464307701475544e-06,
      "loss": 0.3573,
      "step": 2146
    },
    {
      "epoch": 0.0382,
      "grad_norm": 0.5550861954689026,
      "learning_rate": 7.043245373681746e-06,
      "loss": 0.3722,
      "step": 2147
    },
    {
      "epoch": 0.0384,
      "grad_norm": 0.514333963394165,
      "learning_rate": 7.0400589813629645e-06,
      "loss": 0.372,
      "step": 2148
    },
    {
      "epoch": 0.0386,
      "grad_norm": 0.44160163402557373,
      "learning_rate": 7.036871594744218e-06,
      "loss": 0.357,
      "step": 2149
    },
    {
      "epoch": 0.0388,
      "grad_norm": 0.5745121240615845,
      "learning_rate": 7.033683215379002e-06,
      "loss": 0.3579,
      "step": 2150
    },
    {
      "epoch": 0.039,
      "grad_norm": 0.5203642249107361,
      "learning_rate": 7.030493844821291e-06,
      "loss": 0.3183,
      "step": 2151
    },
    {
      "epoch": 0.0392,
      "grad_norm": 0.4967232644557953,
      "learning_rate": 7.027303484625547e-06,
      "loss": 0.3266,
      "step": 2152
    },
    {
      "epoch": 0.0394,
      "grad_norm": 0.5169956088066101,
      "learning_rate": 7.024112136346713e-06,
      "loss": 0.3472,
      "step": 2153
    },
    {
      "epoch": 0.0396,
      "grad_norm": 0.48188161849975586,
      "learning_rate": 7.0209198015402115e-06,
      "loss": 0.3292,
      "step": 2154
    },
    {
      "epoch": 0.0398,
      "grad_norm": 0.47783270478248596,
      "learning_rate": 7.0177264817619514e-06,
      "loss": 0.3219,
      "step": 2155
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.421047568321228,
      "learning_rate": 7.014532178568314e-06,
      "loss": 0.3326,
      "step": 2156
    },
    {
      "epoch": 0.0402,
      "grad_norm": 0.613612174987793,
      "learning_rate": 7.011336893516167e-06,
      "loss": 0.3508,
      "step": 2157
    },
    {
      "epoch": 0.0404,
      "grad_norm": 0.4223123788833618,
      "learning_rate": 7.008140628162851e-06,
      "loss": 0.3269,
      "step": 2158
    },
    {
      "epoch": 0.0406,
      "grad_norm": 0.44851329922676086,
      "learning_rate": 7.0049433840661875e-06,
      "loss": 0.3518,
      "step": 2159
    },
    {
      "epoch": 0.0408,
      "grad_norm": 0.5525901317596436,
      "learning_rate": 7.0017451627844765e-06,
      "loss": 0.3314,
      "step": 2160
    },
    {
      "epoch": 0.041,
      "grad_norm": 0.506540060043335,
      "learning_rate": 6.998545965876489e-06,
      "loss": 0.3716,
      "step": 2161
    },
    {
      "epoch": 0.0412,
      "grad_norm": 0.6638846397399902,
      "learning_rate": 6.995345794901477e-06,
      "loss": 0.3414,
      "step": 2162
    },
    {
      "epoch": 0.0414,
      "grad_norm": 0.6431050300598145,
      "learning_rate": 6.992144651419163e-06,
      "loss": 0.3583,
      "step": 2163
    },
    {
      "epoch": 0.0416,
      "grad_norm": 0.41391798853874207,
      "learning_rate": 6.98894253698975e-06,
      "loss": 0.3319,
      "step": 2164
    },
    {
      "epoch": 0.0418,
      "grad_norm": 0.48459935188293457,
      "learning_rate": 6.985739453173903e-06,
      "loss": 0.3315,
      "step": 2165
    },
    {
      "epoch": 0.042,
      "grad_norm": 2.0750956535339355,
      "learning_rate": 6.9825354015327715e-06,
      "loss": 0.3697,
      "step": 2166
    },
    {
      "epoch": 0.0422,
      "grad_norm": 0.46460264921188354,
      "learning_rate": 6.979330383627969e-06,
      "loss": 0.3353,
      "step": 2167
    },
    {
      "epoch": 0.0424,
      "grad_norm": 0.49684199690818787,
      "learning_rate": 6.976124401021583e-06,
      "loss": 0.3561,
      "step": 2168
    },
    {
      "epoch": 0.0426,
      "grad_norm": 0.4995613694190979,
      "learning_rate": 6.97291745527617e-06,
      "loss": 0.3598,
      "step": 2169
    },
    {
      "epoch": 0.0428,
      "grad_norm": 0.5035189390182495,
      "learning_rate": 6.9697095479547564e-06,
      "loss": 0.3285,
      "step": 2170
    },
    {
      "epoch": 0.043,
      "grad_norm": 0.6212769746780396,
      "learning_rate": 6.966500680620837e-06,
      "loss": 0.3189,
      "step": 2171
    },
    {
      "epoch": 0.0432,
      "grad_norm": 0.46024197340011597,
      "learning_rate": 6.963290854838376e-06,
      "loss": 0.329,
      "step": 2172
    },
    {
      "epoch": 0.0434,
      "grad_norm": 0.5148010849952698,
      "learning_rate": 6.960080072171802e-06,
      "loss": 0.3323,
      "step": 2173
    },
    {
      "epoch": 0.0436,
      "grad_norm": 0.3952443599700928,
      "learning_rate": 6.9568683341860135e-06,
      "loss": 0.3207,
      "step": 2174
    },
    {
      "epoch": 0.0438,
      "grad_norm": 0.526573896408081,
      "learning_rate": 6.953655642446368e-06,
      "loss": 0.3574,
      "step": 2175
    },
    {
      "epoch": 0.044,
      "grad_norm": 0.4333203434944153,
      "learning_rate": 6.950441998518699e-06,
      "loss": 0.3472,
      "step": 2176
    },
    {
      "epoch": 0.0442,
      "grad_norm": 0.5106233358383179,
      "learning_rate": 6.947227403969293e-06,
      "loss": 0.3704,
      "step": 2177
    },
    {
      "epoch": 0.0444,
      "grad_norm": 0.5527244806289673,
      "learning_rate": 6.944011860364905e-06,
      "loss": 0.3746,
      "step": 2178
    },
    {
      "epoch": 0.0446,
      "grad_norm": 5.292844295501709,
      "learning_rate": 6.940795369272754e-06,
      "loss": 0.3324,
      "step": 2179
    },
    {
      "epoch": 0.0448,
      "grad_norm": 0.4603300988674164,
      "learning_rate": 6.9375779322605154e-06,
      "loss": 0.3335,
      "step": 2180
    },
    {
      "epoch": 0.045,
      "grad_norm": 0.48820754885673523,
      "learning_rate": 6.934359550896332e-06,
      "loss": 0.345,
      "step": 2181
    },
    {
      "epoch": 0.0452,
      "grad_norm": 0.44726765155792236,
      "learning_rate": 6.9311402267488004e-06,
      "loss": 0.3636,
      "step": 2182
    },
    {
      "epoch": 0.0454,
      "grad_norm": 0.5312230587005615,
      "learning_rate": 6.927919961386984e-06,
      "loss": 0.3447,
      "step": 2183
    },
    {
      "epoch": 0.0456,
      "grad_norm": 0.4886765778064728,
      "learning_rate": 6.924698756380398e-06,
      "loss": 0.3588,
      "step": 2184
    },
    {
      "epoch": 0.0458,
      "grad_norm": 0.520865797996521,
      "learning_rate": 6.921476613299018e-06,
      "loss": 0.3475,
      "step": 2185
    },
    {
      "epoch": 0.046,
      "grad_norm": 0.5539115071296692,
      "learning_rate": 6.9182535337132824e-06,
      "loss": 0.3451,
      "step": 2186
    },
    {
      "epoch": 0.0462,
      "grad_norm": 0.47452935576438904,
      "learning_rate": 6.915029519194076e-06,
      "loss": 0.3238,
      "step": 2187
    },
    {
      "epoch": 0.0464,
      "grad_norm": 0.5918989181518555,
      "learning_rate": 6.911804571312746e-06,
      "loss": 0.3316,
      "step": 2188
    },
    {
      "epoch": 0.0466,
      "grad_norm": 0.541374146938324,
      "learning_rate": 6.908578691641092e-06,
      "loss": 0.341,
      "step": 2189
    },
    {
      "epoch": 0.0468,
      "grad_norm": 0.9480854868888855,
      "learning_rate": 6.905351881751372e-06,
      "loss": 0.3589,
      "step": 2190
    },
    {
      "epoch": 0.047,
      "grad_norm": 0.3979714810848236,
      "learning_rate": 6.9021241432162886e-06,
      "loss": 0.3082,
      "step": 2191
    },
    {
      "epoch": 0.0472,
      "grad_norm": 0.4741816818714142,
      "learning_rate": 6.898895477609007e-06,
      "loss": 0.3176,
      "step": 2192
    },
    {
      "epoch": 0.0474,
      "grad_norm": 0.5086230039596558,
      "learning_rate": 6.895665886503136e-06,
      "loss": 0.3547,
      "step": 2193
    },
    {
      "epoch": 0.0476,
      "grad_norm": 0.6380074620246887,
      "learning_rate": 6.892435371472741e-06,
      "loss": 0.378,
      "step": 2194
    },
    {
      "epoch": 0.0478,
      "grad_norm": 0.7122999429702759,
      "learning_rate": 6.889203934092337e-06,
      "loss": 0.3272,
      "step": 2195
    },
    {
      "epoch": 0.048,
      "grad_norm": 0.4187851846218109,
      "learning_rate": 6.885971575936884e-06,
      "loss": 0.3644,
      "step": 2196
    },
    {
      "epoch": 0.0482,
      "grad_norm": 0.5588025450706482,
      "learning_rate": 6.882738298581797e-06,
      "loss": 0.375,
      "step": 2197
    },
    {
      "epoch": 0.0484,
      "grad_norm": 0.5181788206100464,
      "learning_rate": 6.879504103602934e-06,
      "loss": 0.351,
      "step": 2198
    },
    {
      "epoch": 0.0486,
      "grad_norm": 0.4586130380630493,
      "learning_rate": 6.876268992576605e-06,
      "loss": 0.3441,
      "step": 2199
    },
    {
      "epoch": 0.0488,
      "grad_norm": 0.5605753064155579,
      "learning_rate": 6.873032967079562e-06,
      "loss": 0.3357,
      "step": 2200
    },
    {
      "epoch": 0.049,
      "grad_norm": 0.4614698588848114,
      "learning_rate": 6.869796028689002e-06,
      "loss": 0.3433,
      "step": 2201
    },
    {
      "epoch": 0.0492,
      "grad_norm": 0.4657248854637146,
      "learning_rate": 6.866558178982575e-06,
      "loss": 0.3478,
      "step": 2202
    },
    {
      "epoch": 0.0494,
      "grad_norm": 0.48945602774620056,
      "learning_rate": 6.863319419538366e-06,
      "loss": 0.3552,
      "step": 2203
    },
    {
      "epoch": 0.0496,
      "grad_norm": 0.4465474486351013,
      "learning_rate": 6.860079751934908e-06,
      "loss": 0.3219,
      "step": 2204
    },
    {
      "epoch": 0.0498,
      "grad_norm": 0.5487223863601685,
      "learning_rate": 6.856839177751175e-06,
      "loss": 0.3673,
      "step": 2205
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.4497894048690796,
      "learning_rate": 6.853597698566583e-06,
      "loss": 0.3179,
      "step": 2206
    },
    {
      "epoch": 0.0502,
      "grad_norm": 0.4526189863681793,
      "learning_rate": 6.850355315960992e-06,
      "loss": 0.3182,
      "step": 2207
    },
    {
      "epoch": 0.0504,
      "grad_norm": 1.4277551174163818,
      "learning_rate": 6.847112031514698e-06,
      "loss": 0.3203,
      "step": 2208
    },
    {
      "epoch": 0.0506,
      "grad_norm": 0.4962864816188812,
      "learning_rate": 6.843867846808438e-06,
      "loss": 0.3241,
      "step": 2209
    },
    {
      "epoch": 0.0508,
      "grad_norm": 0.528592050075531,
      "learning_rate": 6.840622763423391e-06,
      "loss": 0.3455,
      "step": 2210
    },
    {
      "epoch": 0.051,
      "grad_norm": 0.4863801598548889,
      "learning_rate": 6.837376782941168e-06,
      "loss": 0.352,
      "step": 2211
    },
    {
      "epoch": 0.0512,
      "grad_norm": 0.5002415776252747,
      "learning_rate": 6.834129906943822e-06,
      "loss": 0.3149,
      "step": 2212
    },
    {
      "epoch": 0.0514,
      "grad_norm": 0.44608214497566223,
      "learning_rate": 6.830882137013839e-06,
      "loss": 0.3185,
      "step": 2213
    },
    {
      "epoch": 0.0516,
      "grad_norm": 0.5437160134315491,
      "learning_rate": 6.827633474734145e-06,
      "loss": 0.3757,
      "step": 2214
    },
    {
      "epoch": 0.0518,
      "grad_norm": 0.48347461223602295,
      "learning_rate": 6.824383921688098e-06,
      "loss": 0.36,
      "step": 2215
    },
    {
      "epoch": 0.052,
      "grad_norm": 0.5304652452468872,
      "learning_rate": 6.821133479459492e-06,
      "loss": 0.3614,
      "step": 2216
    },
    {
      "epoch": 0.0522,
      "grad_norm": 0.47104841470718384,
      "learning_rate": 6.81788214963255e-06,
      "loss": 0.3631,
      "step": 2217
    },
    {
      "epoch": 0.0524,
      "grad_norm": 0.8750721216201782,
      "learning_rate": 6.814629933791932e-06,
      "loss": 0.3291,
      "step": 2218
    },
    {
      "epoch": 0.0526,
      "grad_norm": 0.4055868089199066,
      "learning_rate": 6.811376833522729e-06,
      "loss": 0.3199,
      "step": 2219
    },
    {
      "epoch": 0.0528,
      "grad_norm": 0.47547656297683716,
      "learning_rate": 6.808122850410461e-06,
      "loss": 0.3402,
      "step": 2220
    },
    {
      "epoch": 0.053,
      "grad_norm": 0.5648210644721985,
      "learning_rate": 6.804867986041084e-06,
      "loss": 0.3463,
      "step": 2221
    },
    {
      "epoch": 0.0532,
      "grad_norm": 0.9300905466079712,
      "learning_rate": 6.8016122420009745e-06,
      "loss": 0.3467,
      "step": 2222
    },
    {
      "epoch": 0.0534,
      "grad_norm": 0.5517810583114624,
      "learning_rate": 6.798355619876944e-06,
      "loss": 0.3369,
      "step": 2223
    },
    {
      "epoch": 0.0536,
      "grad_norm": 0.4621276557445526,
      "learning_rate": 6.7950981212562315e-06,
      "loss": 0.3673,
      "step": 2224
    },
    {
      "epoch": 0.0538,
      "grad_norm": 0.5088163018226624,
      "learning_rate": 6.7918397477265e-06,
      "loss": 0.3428,
      "step": 2225
    },
    {
      "epoch": 0.054,
      "grad_norm": 0.5402028560638428,
      "learning_rate": 6.788580500875848e-06,
      "loss": 0.3439,
      "step": 2226
    },
    {
      "epoch": 0.0542,
      "grad_norm": 0.6210148334503174,
      "learning_rate": 6.785320382292783e-06,
      "loss": 0.3593,
      "step": 2227
    },
    {
      "epoch": 0.0544,
      "grad_norm": 0.5209822654724121,
      "learning_rate": 6.782059393566254e-06,
      "loss": 0.3677,
      "step": 2228
    },
    {
      "epoch": 0.0546,
      "grad_norm": 0.4449623227119446,
      "learning_rate": 6.778797536285625e-06,
      "loss": 0.3143,
      "step": 2229
    },
    {
      "epoch": 0.0548,
      "grad_norm": 0.5518540143966675,
      "learning_rate": 6.775534812040686e-06,
      "loss": 0.3506,
      "step": 2230
    },
    {
      "epoch": 0.055,
      "grad_norm": 0.4874712824821472,
      "learning_rate": 6.772271222421649e-06,
      "loss": 0.3684,
      "step": 2231
    },
    {
      "epoch": 0.0552,
      "grad_norm": 0.45094040036201477,
      "learning_rate": 6.769006769019147e-06,
      "loss": 0.333,
      "step": 2232
    },
    {
      "epoch": 0.0554,
      "grad_norm": 0.5510659217834473,
      "learning_rate": 6.765741453424237e-06,
      "loss": 0.379,
      "step": 2233
    },
    {
      "epoch": 0.0556,
      "grad_norm": 0.4283663034439087,
      "learning_rate": 6.762475277228393e-06,
      "loss": 0.3231,
      "step": 2234
    },
    {
      "epoch": 0.0558,
      "grad_norm": 0.5620145797729492,
      "learning_rate": 6.759208242023509e-06,
      "loss": 0.325,
      "step": 2235
    },
    {
      "epoch": 0.056,
      "grad_norm": 0.8665119409561157,
      "learning_rate": 6.755940349401901e-06,
      "loss": 0.3718,
      "step": 2236
    },
    {
      "epoch": 0.0562,
      "grad_norm": 0.4215657114982605,
      "learning_rate": 6.752671600956295e-06,
      "loss": 0.3322,
      "step": 2237
    },
    {
      "epoch": 0.0564,
      "grad_norm": 0.5102256536483765,
      "learning_rate": 6.749401998279845e-06,
      "loss": 0.3375,
      "step": 2238
    },
    {
      "epoch": 0.0566,
      "grad_norm": 0.42943525314331055,
      "learning_rate": 6.746131542966112e-06,
      "loss": 0.3422,
      "step": 2239
    },
    {
      "epoch": 0.0568,
      "grad_norm": 0.4518658220767975,
      "learning_rate": 6.7428602366090764e-06,
      "loss": 0.3371,
      "step": 2240
    },
    {
      "epoch": 0.057,
      "grad_norm": 0.6692396402359009,
      "learning_rate": 6.739588080803134e-06,
      "loss": 0.3563,
      "step": 2241
    },
    {
      "epoch": 0.0572,
      "grad_norm": 0.5108334422111511,
      "learning_rate": 6.736315077143095e-06,
      "loss": 0.3698,
      "step": 2242
    },
    {
      "epoch": 0.0574,
      "grad_norm": 0.4440063238143921,
      "learning_rate": 6.733041227224182e-06,
      "loss": 0.3089,
      "step": 2243
    },
    {
      "epoch": 0.0576,
      "grad_norm": 0.44452667236328125,
      "learning_rate": 6.729766532642024e-06,
      "loss": 0.3593,
      "step": 2244
    },
    {
      "epoch": 0.0578,
      "grad_norm": 0.47373735904693604,
      "learning_rate": 6.7264909949926735e-06,
      "loss": 0.317,
      "step": 2245
    },
    {
      "epoch": 0.058,
      "grad_norm": 0.43215399980545044,
      "learning_rate": 6.723214615872585e-06,
      "loss": 0.3005,
      "step": 2246
    },
    {
      "epoch": 0.0582,
      "grad_norm": 0.571315348148346,
      "learning_rate": 6.719937396878628e-06,
      "loss": 0.3417,
      "step": 2247
    },
    {
      "epoch": 0.0584,
      "grad_norm": 0.4681345820426941,
      "learning_rate": 6.716659339608077e-06,
      "loss": 0.3678,
      "step": 2248
    },
    {
      "epoch": 0.0586,
      "grad_norm": 0.5038642883300781,
      "learning_rate": 6.713380445658618e-06,
      "loss": 0.333,
      "step": 2249
    },
    {
      "epoch": 0.0588,
      "grad_norm": 0.5396400094032288,
      "learning_rate": 6.710100716628345e-06,
      "loss": 0.3523,
      "step": 2250
    },
    {
      "epoch": 0.059,
      "grad_norm": 0.5410488843917847,
      "learning_rate": 6.7068201541157555e-06,
      "loss": 0.3819,
      "step": 2251
    },
    {
      "epoch": 0.0592,
      "grad_norm": 0.894669234752655,
      "learning_rate": 6.70353875971976e-06,
      "loss": 0.3405,
      "step": 2252
    },
    {
      "epoch": 0.0594,
      "grad_norm": 0.4946800172328949,
      "learning_rate": 6.700256535039665e-06,
      "loss": 0.3714,
      "step": 2253
    },
    {
      "epoch": 0.0596,
      "grad_norm": 0.6367211937904358,
      "learning_rate": 6.6969734816751906e-06,
      "loss": 0.3646,
      "step": 2254
    },
    {
      "epoch": 0.0598,
      "grad_norm": 0.583281397819519,
      "learning_rate": 6.693689601226458e-06,
      "loss": 0.3831,
      "step": 2255
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.6009190082550049,
      "learning_rate": 6.690404895293987e-06,
      "loss": 0.3396,
      "step": 2256
    },
    {
      "epoch": 0.0602,
      "grad_norm": 0.5110039710998535,
      "learning_rate": 6.687119365478707e-06,
      "loss": 0.3818,
      "step": 2257
    },
    {
      "epoch": 0.0604,
      "grad_norm": 0.49086347222328186,
      "learning_rate": 6.683833013381942e-06,
      "loss": 0.3624,
      "step": 2258
    },
    {
      "epoch": 0.0606,
      "grad_norm": 0.525911808013916,
      "learning_rate": 6.680545840605423e-06,
      "loss": 0.3444,
      "step": 2259
    },
    {
      "epoch": 0.0608,
      "grad_norm": 0.4058866798877716,
      "learning_rate": 6.677257848751276e-06,
      "loss": 0.3295,
      "step": 2260
    },
    {
      "epoch": 0.061,
      "grad_norm": 0.9800489544868469,
      "learning_rate": 6.673969039422029e-06,
      "loss": 0.3106,
      "step": 2261
    },
    {
      "epoch": 0.0612,
      "grad_norm": 0.4528243839740753,
      "learning_rate": 6.6706794142206085e-06,
      "loss": 0.3372,
      "step": 2262
    },
    {
      "epoch": 0.0614,
      "grad_norm": 0.46649426221847534,
      "learning_rate": 6.6673889747503364e-06,
      "loss": 0.3287,
      "step": 2263
    },
    {
      "epoch": 0.0616,
      "grad_norm": 0.6663564443588257,
      "learning_rate": 6.664097722614934e-06,
      "loss": 0.3465,
      "step": 2264
    },
    {
      "epoch": 0.0618,
      "grad_norm": 0.4502687454223633,
      "learning_rate": 6.6608056594185166e-06,
      "loss": 0.3429,
      "step": 2265
    },
    {
      "epoch": 0.062,
      "grad_norm": 0.5377664566040039,
      "learning_rate": 6.657512786765599e-06,
      "loss": 0.3356,
      "step": 2266
    },
    {
      "epoch": 0.0622,
      "grad_norm": 0.3694286048412323,
      "learning_rate": 6.654219106261082e-06,
      "loss": 0.3191,
      "step": 2267
    },
    {
      "epoch": 0.0624,
      "grad_norm": 0.440787136554718,
      "learning_rate": 6.6509246195102685e-06,
      "loss": 0.3496,
      "step": 2268
    },
    {
      "epoch": 0.0626,
      "grad_norm": 0.4365440905094147,
      "learning_rate": 6.647629328118852e-06,
      "loss": 0.2907,
      "step": 2269
    },
    {
      "epoch": 0.0628,
      "grad_norm": 0.5410246849060059,
      "learning_rate": 6.644333233692917e-06,
      "loss": 0.3198,
      "step": 2270
    },
    {
      "epoch": 0.063,
      "grad_norm": 0.4398348927497864,
      "learning_rate": 6.64103633783894e-06,
      "loss": 0.3369,
      "step": 2271
    },
    {
      "epoch": 0.0632,
      "grad_norm": 0.4404999911785126,
      "learning_rate": 6.637738642163785e-06,
      "loss": 0.3,
      "step": 2272
    },
    {
      "epoch": 0.0634,
      "grad_norm": 0.496121883392334,
      "learning_rate": 6.634440148274712e-06,
      "loss": 0.3247,
      "step": 2273
    },
    {
      "epoch": 0.0636,
      "grad_norm": 0.4267372190952301,
      "learning_rate": 6.631140857779368e-06,
      "loss": 0.3329,
      "step": 2274
    },
    {
      "epoch": 0.0638,
      "grad_norm": 0.5238857269287109,
      "learning_rate": 6.627840772285784e-06,
      "loss": 0.338,
      "step": 2275
    },
    {
      "epoch": 0.064,
      "grad_norm": 0.6000864505767822,
      "learning_rate": 6.624539893402383e-06,
      "loss": 0.3572,
      "step": 2276
    },
    {
      "epoch": 0.0642,
      "grad_norm": 0.4456091821193695,
      "learning_rate": 6.6212382227379726e-06,
      "loss": 0.3631,
      "step": 2277
    },
    {
      "epoch": 0.0644,
      "grad_norm": 0.5112344622612,
      "learning_rate": 6.617935761901748e-06,
      "loss": 0.3963,
      "step": 2278
    },
    {
      "epoch": 0.0646,
      "grad_norm": 0.6333364248275757,
      "learning_rate": 6.614632512503289e-06,
      "loss": 0.2836,
      "step": 2279
    },
    {
      "epoch": 0.0648,
      "grad_norm": 0.4348183870315552,
      "learning_rate": 6.611328476152557e-06,
      "loss": 0.3216,
      "step": 2280
    },
    {
      "epoch": 0.065,
      "grad_norm": 0.4898277521133423,
      "learning_rate": 6.6080236544599e-06,
      "loss": 0.3806,
      "step": 2281
    },
    {
      "epoch": 0.0652,
      "grad_norm": 0.5011742115020752,
      "learning_rate": 6.604718049036047e-06,
      "loss": 0.3513,
      "step": 2282
    },
    {
      "epoch": 0.0654,
      "grad_norm": 0.4281587600708008,
      "learning_rate": 6.601411661492114e-06,
      "loss": 0.3694,
      "step": 2283
    },
    {
      "epoch": 0.0656,
      "grad_norm": 0.4806574583053589,
      "learning_rate": 6.59810449343959e-06,
      "loss": 0.3284,
      "step": 2284
    },
    {
      "epoch": 0.0658,
      "grad_norm": 0.4611220359802246,
      "learning_rate": 6.594796546490351e-06,
      "loss": 0.333,
      "step": 2285
    },
    {
      "epoch": 0.066,
      "grad_norm": 0.516805112361908,
      "learning_rate": 6.591487822256648e-06,
      "loss": 0.3284,
      "step": 2286
    },
    {
      "epoch": 0.0662,
      "grad_norm": 0.4474007189273834,
      "learning_rate": 6.588178322351113e-06,
      "loss": 0.3388,
      "step": 2287
    },
    {
      "epoch": 0.0664,
      "grad_norm": 0.4661369025707245,
      "learning_rate": 6.58486804838676e-06,
      "loss": 0.3359,
      "step": 2288
    },
    {
      "epoch": 0.0666,
      "grad_norm": 0.6331821084022522,
      "learning_rate": 6.58155700197697e-06,
      "loss": 0.3671,
      "step": 2289
    },
    {
      "epoch": 0.0668,
      "grad_norm": 0.43593156337738037,
      "learning_rate": 6.578245184735513e-06,
      "loss": 0.3165,
      "step": 2290
    },
    {
      "epoch": 0.067,
      "grad_norm": 0.4109969437122345,
      "learning_rate": 6.574932598276524e-06,
      "loss": 0.3407,
      "step": 2291
    },
    {
      "epoch": 0.0672,
      "grad_norm": 0.4015258252620697,
      "learning_rate": 6.571619244214521e-06,
      "loss": 0.3216,
      "step": 2292
    },
    {
      "epoch": 0.0674,
      "grad_norm": 0.4452706575393677,
      "learning_rate": 6.5683051241643894e-06,
      "loss": 0.3534,
      "step": 2293
    },
    {
      "epoch": 0.0676,
      "grad_norm": 0.573607325553894,
      "learning_rate": 6.5649902397413915e-06,
      "loss": 0.3706,
      "step": 2294
    },
    {
      "epoch": 0.0678,
      "grad_norm": 0.5733379125595093,
      "learning_rate": 6.561674592561164e-06,
      "loss": 0.3209,
      "step": 2295
    },
    {
      "epoch": 0.068,
      "grad_norm": 0.542415976524353,
      "learning_rate": 6.558358184239709e-06,
      "loss": 0.3303,
      "step": 2296
    },
    {
      "epoch": 0.0682,
      "grad_norm": 0.531074047088623,
      "learning_rate": 6.55504101639341e-06,
      "loss": 0.3421,
      "step": 2297
    },
    {
      "epoch": 0.0684,
      "grad_norm": 0.5372994542121887,
      "learning_rate": 6.551723090639008e-06,
      "loss": 0.333,
      "step": 2298
    },
    {
      "epoch": 0.0686,
      "grad_norm": 0.469684362411499,
      "learning_rate": 6.548404408593622e-06,
      "loss": 0.322,
      "step": 2299
    },
    {
      "epoch": 0.0688,
      "grad_norm": 0.5271499752998352,
      "learning_rate": 6.545084971874738e-06,
      "loss": 0.3295,
      "step": 2300
    },
    {
      "epoch": 0.069,
      "grad_norm": 0.5359232425689697,
      "learning_rate": 6.541764782100208e-06,
      "loss": 0.338,
      "step": 2301
    },
    {
      "epoch": 0.0692,
      "grad_norm": 0.5083122849464417,
      "learning_rate": 6.538443840888254e-06,
      "loss": 0.3744,
      "step": 2302
    },
    {
      "epoch": 0.0694,
      "grad_norm": 0.5099042654037476,
      "learning_rate": 6.53512214985746e-06,
      "loss": 0.3431,
      "step": 2303
    },
    {
      "epoch": 0.0696,
      "grad_norm": 0.43436524271965027,
      "learning_rate": 6.53179971062678e-06,
      "loss": 0.3048,
      "step": 2304
    },
    {
      "epoch": 0.0698,
      "grad_norm": 0.5373983383178711,
      "learning_rate": 6.5284765248155295e-06,
      "loss": 0.3218,
      "step": 2305
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.42452308535575867,
      "learning_rate": 6.525152594043389e-06,
      "loss": 0.3295,
      "step": 2306
    },
    {
      "epoch": 0.0702,
      "grad_norm": 0.4913865923881531,
      "learning_rate": 6.5218279199304014e-06,
      "loss": 0.3514,
      "step": 2307
    },
    {
      "epoch": 0.0704,
      "grad_norm": 0.4042423367500305,
      "learning_rate": 6.518502504096972e-06,
      "loss": 0.3414,
      "step": 2308
    },
    {
      "epoch": 0.0706,
      "grad_norm": 0.5422516465187073,
      "learning_rate": 6.5151763481638705e-06,
      "loss": 0.3768,
      "step": 2309
    },
    {
      "epoch": 0.0708,
      "grad_norm": 0.7036200165748596,
      "learning_rate": 6.5118494537522235e-06,
      "loss": 0.3158,
      "step": 2310
    },
    {
      "epoch": 0.071,
      "grad_norm": 0.4788452088832855,
      "learning_rate": 6.508521822483518e-06,
      "loss": 0.3154,
      "step": 2311
    },
    {
      "epoch": 0.0712,
      "grad_norm": 0.4812452495098114,
      "learning_rate": 6.505193455979603e-06,
      "loss": 0.3493,
      "step": 2312
    },
    {
      "epoch": 0.0714,
      "grad_norm": 0.5330455303192139,
      "learning_rate": 6.501864355862682e-06,
      "loss": 0.3458,
      "step": 2313
    },
    {
      "epoch": 0.0716,
      "grad_norm": 0.4141874611377716,
      "learning_rate": 6.49853452375532e-06,
      "loss": 0.3378,
      "step": 2314
    },
    {
      "epoch": 0.0718,
      "grad_norm": 0.5108392834663391,
      "learning_rate": 6.495203961280434e-06,
      "loss": 0.3413,
      "step": 2315
    },
    {
      "epoch": 0.072,
      "grad_norm": 0.3845902383327484,
      "learning_rate": 6.491872670061302e-06,
      "loss": 0.3073,
      "step": 2316
    },
    {
      "epoch": 0.0722,
      "grad_norm": 0.43255946040153503,
      "learning_rate": 6.4885406517215535e-06,
      "loss": 0.3444,
      "step": 2317
    },
    {
      "epoch": 0.0724,
      "grad_norm": 0.4763522446155548,
      "learning_rate": 6.485207907885175e-06,
      "loss": 0.349,
      "step": 2318
    },
    {
      "epoch": 0.0726,
      "grad_norm": 0.4820054769515991,
      "learning_rate": 6.481874440176506e-06,
      "loss": 0.3346,
      "step": 2319
    },
    {
      "epoch": 0.0728,
      "grad_norm": 0.9348790645599365,
      "learning_rate": 6.4785402502202345e-06,
      "loss": 0.3308,
      "step": 2320
    },
    {
      "epoch": 0.073,
      "grad_norm": 0.5042381286621094,
      "learning_rate": 6.4752053396414075e-06,
      "loss": 0.3646,
      "step": 2321
    },
    {
      "epoch": 0.0732,
      "grad_norm": 0.45196253061294556,
      "learning_rate": 6.471869710065418e-06,
      "loss": 0.358,
      "step": 2322
    },
    {
      "epoch": 0.0734,
      "grad_norm": 0.46413469314575195,
      "learning_rate": 6.4685333631180145e-06,
      "loss": 0.3423,
      "step": 2323
    },
    {
      "epoch": 0.0736,
      "grad_norm": 0.5222298502922058,
      "learning_rate": 6.465196300425287e-06,
      "loss": 0.3427,
      "step": 2324
    },
    {
      "epoch": 0.0738,
      "grad_norm": 0.43250784277915955,
      "learning_rate": 6.461858523613684e-06,
      "loss": 0.3498,
      "step": 2325
    },
    {
      "epoch": 0.074,
      "grad_norm": 0.5217347741127014,
      "learning_rate": 6.458520034309995e-06,
      "loss": 0.3607,
      "step": 2326
    },
    {
      "epoch": 0.0742,
      "grad_norm": 0.406806081533432,
      "learning_rate": 6.455180834141359e-06,
      "loss": 0.3322,
      "step": 2327
    },
    {
      "epoch": 0.0744,
      "grad_norm": 0.46727097034454346,
      "learning_rate": 6.451840924735264e-06,
      "loss": 0.3237,
      "step": 2328
    },
    {
      "epoch": 0.0746,
      "grad_norm": 0.5123676657676697,
      "learning_rate": 6.448500307719537e-06,
      "loss": 0.3484,
      "step": 2329
    },
    {
      "epoch": 0.0748,
      "grad_norm": 0.402761846780777,
      "learning_rate": 6.445158984722358e-06,
      "loss": 0.3248,
      "step": 2330
    },
    {
      "epoch": 0.075,
      "grad_norm": 0.5218918919563293,
      "learning_rate": 6.441816957372247e-06,
      "loss": 0.3363,
      "step": 2331
    },
    {
      "epoch": 0.0752,
      "grad_norm": 0.4100588262081146,
      "learning_rate": 6.438474227298065e-06,
      "loss": 0.3399,
      "step": 2332
    },
    {
      "epoch": 0.0754,
      "grad_norm": 0.5548402070999146,
      "learning_rate": 6.435130796129019e-06,
      "loss": 0.3216,
      "step": 2333
    },
    {
      "epoch": 0.0756,
      "grad_norm": 0.590120792388916,
      "learning_rate": 6.431786665494657e-06,
      "loss": 0.3669,
      "step": 2334
    },
    {
      "epoch": 0.0758,
      "grad_norm": 0.40840840339660645,
      "learning_rate": 6.428441837024868e-06,
      "loss": 0.3429,
      "step": 2335
    },
    {
      "epoch": 0.076,
      "grad_norm": 0.5337225198745728,
      "learning_rate": 6.425096312349881e-06,
      "loss": 0.3589,
      "step": 2336
    },
    {
      "epoch": 0.0762,
      "grad_norm": 0.6463695764541626,
      "learning_rate": 6.421750093100264e-06,
      "loss": 0.3683,
      "step": 2337
    },
    {
      "epoch": 0.0764,
      "grad_norm": 0.43012523651123047,
      "learning_rate": 6.418403180906923e-06,
      "loss": 0.3151,
      "step": 2338
    },
    {
      "epoch": 0.0766,
      "grad_norm": 0.44772282242774963,
      "learning_rate": 6.415055577401101e-06,
      "loss": 0.3376,
      "step": 2339
    },
    {
      "epoch": 0.0768,
      "grad_norm": 0.4547172784805298,
      "learning_rate": 6.411707284214384e-06,
      "loss": 0.3306,
      "step": 2340
    },
    {
      "epoch": 0.077,
      "grad_norm": 1.2394989728927612,
      "learning_rate": 6.408358302978683e-06,
      "loss": 0.3634,
      "step": 2341
    },
    {
      "epoch": 0.0772,
      "grad_norm": 0.5328880548477173,
      "learning_rate": 6.4050086353262565e-06,
      "loss": 0.337,
      "step": 2342
    },
    {
      "epoch": 0.0774,
      "grad_norm": 0.4310884475708008,
      "learning_rate": 6.401658282889689e-06,
      "loss": 0.3408,
      "step": 2343
    },
    {
      "epoch": 0.0776,
      "grad_norm": 0.5393918752670288,
      "learning_rate": 6.3983072473019e-06,
      "loss": 0.3219,
      "step": 2344
    },
    {
      "epoch": 0.0778,
      "grad_norm": 0.42995643615722656,
      "learning_rate": 6.3949555301961474e-06,
      "loss": 0.3322,
      "step": 2345
    },
    {
      "epoch": 0.078,
      "grad_norm": 0.440855473279953,
      "learning_rate": 6.391603133206015e-06,
      "loss": 0.3281,
      "step": 2346
    },
    {
      "epoch": 0.0782,
      "grad_norm": 0.4727974534034729,
      "learning_rate": 6.388250057965421e-06,
      "loss": 0.339,
      "step": 2347
    },
    {
      "epoch": 0.0784,
      "grad_norm": 0.49236348271369934,
      "learning_rate": 6.384896306108612e-06,
      "loss": 0.3417,
      "step": 2348
    },
    {
      "epoch": 0.0786,
      "grad_norm": 0.477784663438797,
      "learning_rate": 6.3815418792701686e-06,
      "loss": 0.3663,
      "step": 2349
    },
    {
      "epoch": 0.0788,
      "grad_norm": 0.518724262714386,
      "learning_rate": 6.378186779084996e-06,
      "loss": 0.3508,
      "step": 2350
    },
    {
      "epoch": 0.079,
      "grad_norm": 0.4606408178806305,
      "learning_rate": 6.374831007188331e-06,
      "loss": 0.3753,
      "step": 2351
    },
    {
      "epoch": 0.0792,
      "grad_norm": 0.46507635712623596,
      "learning_rate": 6.371474565215734e-06,
      "loss": 0.3408,
      "step": 2352
    },
    {
      "epoch": 0.0794,
      "grad_norm": 0.5252976417541504,
      "learning_rate": 6.368117454803093e-06,
      "loss": 0.3305,
      "step": 2353
    },
    {
      "epoch": 0.0796,
      "grad_norm": 0.6039084196090698,
      "learning_rate": 6.364759677586627e-06,
      "loss": 0.3322,
      "step": 2354
    },
    {
      "epoch": 0.0798,
      "grad_norm": 0.5714782476425171,
      "learning_rate": 6.361401235202872e-06,
      "loss": 0.3452,
      "step": 2355
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.5207638740539551,
      "learning_rate": 6.358042129288694e-06,
      "loss": 0.3559,
      "step": 2356
    },
    {
      "epoch": 0.0802,
      "grad_norm": 0.4567708373069763,
      "learning_rate": 6.35468236148128e-06,
      "loss": 0.3423,
      "step": 2357
    },
    {
      "epoch": 0.0804,
      "grad_norm": 0.5505416393280029,
      "learning_rate": 6.35132193341814e-06,
      "loss": 0.3417,
      "step": 2358
    },
    {
      "epoch": 0.0806,
      "grad_norm": 0.5091280341148376,
      "learning_rate": 6.3479608467371055e-06,
      "loss": 0.3386,
      "step": 2359
    },
    {
      "epoch": 0.0808,
      "grad_norm": 0.41097936034202576,
      "learning_rate": 6.344599103076329e-06,
      "loss": 0.3018,
      "step": 2360
    },
    {
      "epoch": 0.081,
      "grad_norm": 0.5598363876342773,
      "learning_rate": 6.341236704074285e-06,
      "loss": 0.3192,
      "step": 2361
    },
    {
      "epoch": 0.0812,
      "grad_norm": 0.5451776385307312,
      "learning_rate": 6.337873651369764e-06,
      "loss": 0.3492,
      "step": 2362
    },
    {
      "epoch": 0.0814,
      "grad_norm": 0.5237850546836853,
      "learning_rate": 6.334509946601879e-06,
      "loss": 0.3177,
      "step": 2363
    },
    {
      "epoch": 0.0816,
      "grad_norm": 0.6212160587310791,
      "learning_rate": 6.331145591410057e-06,
      "loss": 0.3557,
      "step": 2364
    },
    {
      "epoch": 0.0818,
      "grad_norm": 0.5246014595031738,
      "learning_rate": 6.327780587434045e-06,
      "loss": 0.3252,
      "step": 2365
    },
    {
      "epoch": 0.082,
      "grad_norm": 0.6221247911453247,
      "learning_rate": 6.324414936313904e-06,
      "loss": 0.3547,
      "step": 2366
    },
    {
      "epoch": 0.0822,
      "grad_norm": 0.5106465816497803,
      "learning_rate": 6.321048639690013e-06,
      "loss": 0.3727,
      "step": 2367
    },
    {
      "epoch": 0.0824,
      "grad_norm": 0.523582398891449,
      "learning_rate": 6.317681699203065e-06,
      "loss": 0.3511,
      "step": 2368
    },
    {
      "epoch": 0.0826,
      "grad_norm": 0.5688356757164001,
      "learning_rate": 6.314314116494061e-06,
      "loss": 0.3291,
      "step": 2369
    },
    {
      "epoch": 0.0828,
      "grad_norm": 0.4489540755748749,
      "learning_rate": 6.310945893204324e-06,
      "loss": 0.3276,
      "step": 2370
    },
    {
      "epoch": 0.083,
      "grad_norm": 0.5527641177177429,
      "learning_rate": 6.307577030975485e-06,
      "loss": 0.3,
      "step": 2371
    },
    {
      "epoch": 0.0832,
      "grad_norm": 0.46056830883026123,
      "learning_rate": 6.304207531449486e-06,
      "loss": 0.3561,
      "step": 2372
    },
    {
      "epoch": 0.0834,
      "grad_norm": 1.069359540939331,
      "learning_rate": 6.3008373962685785e-06,
      "loss": 0.3233,
      "step": 2373
    },
    {
      "epoch": 0.0836,
      "grad_norm": 0.4840530455112457,
      "learning_rate": 6.297466627075327e-06,
      "loss": 0.3471,
      "step": 2374
    },
    {
      "epoch": 0.0838,
      "grad_norm": 0.4515450894832611,
      "learning_rate": 6.294095225512604e-06,
      "loss": 0.3243,
      "step": 2375
    },
    {
      "epoch": 0.084,
      "grad_norm": 0.705654501914978,
      "learning_rate": 6.290723193223589e-06,
      "loss": 0.3247,
      "step": 2376
    },
    {
      "epoch": 0.0842,
      "grad_norm": 0.4913703203201294,
      "learning_rate": 6.28735053185177e-06,
      "loss": 0.3734,
      "step": 2377
    },
    {
      "epoch": 0.0844,
      "grad_norm": 0.5745387077331543,
      "learning_rate": 6.28397724304094e-06,
      "loss": 0.3408,
      "step": 2378
    },
    {
      "epoch": 0.0846,
      "grad_norm": 0.5778160095214844,
      "learning_rate": 6.280603328435199e-06,
      "loss": 0.3301,
      "step": 2379
    },
    {
      "epoch": 0.0848,
      "grad_norm": 0.43054255843162537,
      "learning_rate": 6.277228789678953e-06,
      "loss": 0.3323,
      "step": 2380
    },
    {
      "epoch": 0.085,
      "grad_norm": 0.4482230246067047,
      "learning_rate": 6.273853628416911e-06,
      "loss": 0.3327,
      "step": 2381
    },
    {
      "epoch": 0.0852,
      "grad_norm": 0.4342125952243805,
      "learning_rate": 6.270477846294086e-06,
      "loss": 0.3251,
      "step": 2382
    },
    {
      "epoch": 0.0854,
      "grad_norm": 0.6504151225090027,
      "learning_rate": 6.267101444955792e-06,
      "loss": 0.3355,
      "step": 2383
    },
    {
      "epoch": 0.0856,
      "grad_norm": 0.4461098909378052,
      "learning_rate": 6.2637244260476474e-06,
      "loss": 0.3046,
      "step": 2384
    },
    {
      "epoch": 0.0858,
      "grad_norm": 0.5992279052734375,
      "learning_rate": 6.26034679121557e-06,
      "loss": 0.3337,
      "step": 2385
    },
    {
      "epoch": 0.086,
      "grad_norm": 0.42221707105636597,
      "learning_rate": 6.256968542105775e-06,
      "loss": 0.3248,
      "step": 2386
    },
    {
      "epoch": 0.0862,
      "grad_norm": 0.7462438344955444,
      "learning_rate": 6.2535896803647845e-06,
      "loss": 0.3711,
      "step": 2387
    },
    {
      "epoch": 0.0864,
      "grad_norm": 0.4538249373435974,
      "learning_rate": 6.250210207639411e-06,
      "loss": 0.3257,
      "step": 2388
    },
    {
      "epoch": 0.0866,
      "grad_norm": 0.5752971172332764,
      "learning_rate": 6.24683012557677e-06,
      "loss": 0.3745,
      "step": 2389
    },
    {
      "epoch": 0.0868,
      "grad_norm": 0.4418807029724121,
      "learning_rate": 6.243449435824276e-06,
      "loss": 0.3348,
      "step": 2390
    },
    {
      "epoch": 0.087,
      "grad_norm": 0.5001590251922607,
      "learning_rate": 6.240068140029628e-06,
      "loss": 0.3667,
      "step": 2391
    },
    {
      "epoch": 0.0872,
      "grad_norm": 0.5094300508499146,
      "learning_rate": 6.236686239840836e-06,
      "loss": 0.3296,
      "step": 2392
    },
    {
      "epoch": 0.0874,
      "grad_norm": 0.4087652862071991,
      "learning_rate": 6.233303736906193e-06,
      "loss": 0.3321,
      "step": 2393
    },
    {
      "epoch": 0.0876,
      "grad_norm": 0.4807608127593994,
      "learning_rate": 6.229920632874291e-06,
      "loss": 0.3403,
      "step": 2394
    },
    {
      "epoch": 0.0878,
      "grad_norm": 0.43279340863227844,
      "learning_rate": 6.2265369293940135e-06,
      "loss": 0.3334,
      "step": 2395
    },
    {
      "epoch": 0.088,
      "grad_norm": 0.5101553201675415,
      "learning_rate": 6.223152628114537e-06,
      "loss": 0.3462,
      "step": 2396
    },
    {
      "epoch": 0.0882,
      "grad_norm": 0.4890720546245575,
      "learning_rate": 6.219767730685329e-06,
      "loss": 0.3351,
      "step": 2397
    },
    {
      "epoch": 0.0884,
      "grad_norm": 0.5379849076271057,
      "learning_rate": 6.216382238756147e-06,
      "loss": 0.3763,
      "step": 2398
    },
    {
      "epoch": 0.0886,
      "grad_norm": 0.5037993788719177,
      "learning_rate": 6.212996153977038e-06,
      "loss": 0.3096,
      "step": 2399
    },
    {
      "epoch": 0.0888,
      "grad_norm": 0.46747565269470215,
      "learning_rate": 6.209609477998339e-06,
      "loss": 0.3288,
      "step": 2400
    },
    {
      "epoch": 0.089,
      "grad_norm": 0.8250702619552612,
      "learning_rate": 6.206222212470675e-06,
      "loss": 0.331,
      "step": 2401
    },
    {
      "epoch": 0.0892,
      "grad_norm": 0.5429006218910217,
      "learning_rate": 6.202834359044959e-06,
      "loss": 0.3533,
      "step": 2402
    },
    {
      "epoch": 0.0894,
      "grad_norm": 0.49656200408935547,
      "learning_rate": 6.199445919372388e-06,
      "loss": 0.3325,
      "step": 2403
    },
    {
      "epoch": 0.0896,
      "grad_norm": 0.4582713842391968,
      "learning_rate": 6.1960568951044475e-06,
      "loss": 0.3289,
      "step": 2404
    },
    {
      "epoch": 0.0898,
      "grad_norm": 0.47378668189048767,
      "learning_rate": 6.192667287892905e-06,
      "loss": 0.3517,
      "step": 2405
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.5077605843544006,
      "learning_rate": 6.189277099389816e-06,
      "loss": 0.359,
      "step": 2406
    },
    {
      "epoch": 0.0902,
      "grad_norm": 0.482529878616333,
      "learning_rate": 6.185886331247516e-06,
      "loss": 0.3507,
      "step": 2407
    },
    {
      "epoch": 0.0904,
      "grad_norm": 0.42422056198120117,
      "learning_rate": 6.182494985118625e-06,
      "loss": 0.339,
      "step": 2408
    },
    {
      "epoch": 0.0906,
      "grad_norm": 0.43958616256713867,
      "learning_rate": 6.179103062656042e-06,
      "loss": 0.3135,
      "step": 2409
    },
    {
      "epoch": 0.0908,
      "grad_norm": 0.39092352986335754,
      "learning_rate": 6.17571056551295e-06,
      "loss": 0.346,
      "step": 2410
    },
    {
      "epoch": 0.091,
      "grad_norm": 0.5925837159156799,
      "learning_rate": 6.172317495342812e-06,
      "loss": 0.3346,
      "step": 2411
    },
    {
      "epoch": 0.0912,
      "grad_norm": 0.49113696813583374,
      "learning_rate": 6.168923853799369e-06,
      "loss": 0.3453,
      "step": 2412
    },
    {
      "epoch": 0.0914,
      "grad_norm": 0.5323814749717712,
      "learning_rate": 6.16552964253664e-06,
      "loss": 0.3829,
      "step": 2413
    },
    {
      "epoch": 0.0916,
      "grad_norm": 0.41340434551239014,
      "learning_rate": 6.1621348632089205e-06,
      "loss": 0.3411,
      "step": 2414
    },
    {
      "epoch": 0.0918,
      "grad_norm": 0.5421742796897888,
      "learning_rate": 6.158739517470786e-06,
      "loss": 0.3477,
      "step": 2415
    },
    {
      "epoch": 0.092,
      "grad_norm": 0.44344010949134827,
      "learning_rate": 6.155343606977091e-06,
      "loss": 0.3123,
      "step": 2416
    },
    {
      "epoch": 0.0922,
      "grad_norm": 0.48665133118629456,
      "learning_rate": 6.151947133382954e-06,
      "loss": 0.311,
      "step": 2417
    },
    {
      "epoch": 0.0924,
      "grad_norm": 0.4339523911476135,
      "learning_rate": 6.148550098343778e-06,
      "loss": 0.3553,
      "step": 2418
    },
    {
      "epoch": 0.0926,
      "grad_norm": 0.47434356808662415,
      "learning_rate": 6.145152503515239e-06,
      "loss": 0.3576,
      "step": 2419
    },
    {
      "epoch": 0.0928,
      "grad_norm": 0.49112415313720703,
      "learning_rate": 6.141754350553279e-06,
      "loss": 0.3516,
      "step": 2420
    },
    {
      "epoch": 0.093,
      "grad_norm": 0.40841159224510193,
      "learning_rate": 6.138355641114121e-06,
      "loss": 0.3315,
      "step": 2421
    },
    {
      "epoch": 0.0932,
      "grad_norm": 0.4433099627494812,
      "learning_rate": 6.134956376854251e-06,
      "loss": 0.3306,
      "step": 2422
    },
    {
      "epoch": 0.0934,
      "grad_norm": 0.8346061706542969,
      "learning_rate": 6.13155655943043e-06,
      "loss": 0.3278,
      "step": 2423
    },
    {
      "epoch": 0.0936,
      "grad_norm": 0.4595322608947754,
      "learning_rate": 6.128156190499688e-06,
      "loss": 0.3345,
      "step": 2424
    },
    {
      "epoch": 0.0938,
      "grad_norm": 0.5073685050010681,
      "learning_rate": 6.124755271719326e-06,
      "loss": 0.3434,
      "step": 2425
    },
    {
      "epoch": 0.094,
      "grad_norm": 0.45685386657714844,
      "learning_rate": 6.121353804746907e-06,
      "loss": 0.3527,
      "step": 2426
    },
    {
      "epoch": 0.0942,
      "grad_norm": 0.44121450185775757,
      "learning_rate": 6.117951791240265e-06,
      "loss": 0.3664,
      "step": 2427
    },
    {
      "epoch": 0.0944,
      "grad_norm": 0.46706753969192505,
      "learning_rate": 6.114549232857503e-06,
      "loss": 0.3452,
      "step": 2428
    },
    {
      "epoch": 0.0946,
      "grad_norm": 0.40212535858154297,
      "learning_rate": 6.111146131256983e-06,
      "loss": 0.3277,
      "step": 2429
    },
    {
      "epoch": 0.0948,
      "grad_norm": 0.43506643176078796,
      "learning_rate": 6.107742488097338e-06,
      "loss": 0.3104,
      "step": 2430
    },
    {
      "epoch": 0.095,
      "grad_norm": 0.48888474702835083,
      "learning_rate": 6.10433830503746e-06,
      "loss": 0.3503,
      "step": 2431
    },
    {
      "epoch": 0.0952,
      "grad_norm": 0.42376717925071716,
      "learning_rate": 6.100933583736508e-06,
      "loss": 0.3222,
      "step": 2432
    },
    {
      "epoch": 0.0954,
      "grad_norm": 0.467842698097229,
      "learning_rate": 6.097528325853903e-06,
      "loss": 0.3559,
      "step": 2433
    },
    {
      "epoch": 0.0956,
      "grad_norm": 0.4767381548881531,
      "learning_rate": 6.094122533049324e-06,
      "loss": 0.3616,
      "step": 2434
    },
    {
      "epoch": 0.0958,
      "grad_norm": 0.4524749219417572,
      "learning_rate": 6.090716206982714e-06,
      "loss": 0.3272,
      "step": 2435
    },
    {
      "epoch": 0.096,
      "grad_norm": 0.4882054626941681,
      "learning_rate": 6.087309349314275e-06,
      "loss": 0.345,
      "step": 2436
    },
    {
      "epoch": 0.0962,
      "grad_norm": 0.44369685649871826,
      "learning_rate": 6.083901961704467e-06,
      "loss": 0.3064,
      "step": 2437
    },
    {
      "epoch": 0.0964,
      "grad_norm": 0.40718352794647217,
      "learning_rate": 6.080494045814011e-06,
      "loss": 0.2909,
      "step": 2438
    },
    {
      "epoch": 0.0966,
      "grad_norm": 0.4233592748641968,
      "learning_rate": 6.077085603303883e-06,
      "loss": 0.3061,
      "step": 2439
    },
    {
      "epoch": 0.0968,
      "grad_norm": 0.5627378821372986,
      "learning_rate": 6.073676635835317e-06,
      "loss": 0.3379,
      "step": 2440
    },
    {
      "epoch": 0.097,
      "grad_norm": 0.5162911415100098,
      "learning_rate": 6.0702671450698005e-06,
      "loss": 0.3802,
      "step": 2441
    },
    {
      "epoch": 0.0972,
      "grad_norm": 0.5340176820755005,
      "learning_rate": 6.066857132669081e-06,
      "loss": 0.3412,
      "step": 2442
    },
    {
      "epoch": 0.0974,
      "grad_norm": 0.4086494743824005,
      "learning_rate": 6.0634466002951545e-06,
      "loss": 0.293,
      "step": 2443
    },
    {
      "epoch": 0.0976,
      "grad_norm": 0.432241827249527,
      "learning_rate": 6.060035549610275e-06,
      "loss": 0.3356,
      "step": 2444
    },
    {
      "epoch": 0.0978,
      "grad_norm": 0.4954981505870819,
      "learning_rate": 6.056623982276945e-06,
      "loss": 0.3417,
      "step": 2445
    },
    {
      "epoch": 0.098,
      "grad_norm": 0.47506067156791687,
      "learning_rate": 6.0532118999579206e-06,
      "loss": 0.357,
      "step": 2446
    },
    {
      "epoch": 0.0982,
      "grad_norm": 0.6003977656364441,
      "learning_rate": 6.049799304316214e-06,
      "loss": 0.3324,
      "step": 2447
    },
    {
      "epoch": 0.0984,
      "grad_norm": 0.5059877038002014,
      "learning_rate": 6.046386197015076e-06,
      "loss": 0.32,
      "step": 2448
    },
    {
      "epoch": 0.0986,
      "grad_norm": 0.520940899848938,
      "learning_rate": 6.04297257971802e-06,
      "loss": 0.3874,
      "step": 2449
    },
    {
      "epoch": 0.0988,
      "grad_norm": 0.44793498516082764,
      "learning_rate": 6.039558454088796e-06,
      "loss": 0.3129,
      "step": 2450
    },
    {
      "epoch": 0.099,
      "grad_norm": 0.5410467982292175,
      "learning_rate": 6.036143821791413e-06,
      "loss": 0.3572,
      "step": 2451
    },
    {
      "epoch": 0.0992,
      "grad_norm": 0.4920574724674225,
      "learning_rate": 6.032728684490118e-06,
      "loss": 0.3179,
      "step": 2452
    },
    {
      "epoch": 0.0994,
      "grad_norm": 0.49657419323921204,
      "learning_rate": 6.029313043849407e-06,
      "loss": 0.358,
      "step": 2453
    },
    {
      "epoch": 0.0996,
      "grad_norm": 0.4258445203304291,
      "learning_rate": 6.025896901534023e-06,
      "loss": 0.3461,
      "step": 2454
    },
    {
      "epoch": 0.0998,
      "grad_norm": 0.5048337578773499,
      "learning_rate": 6.022480259208951e-06,
      "loss": 0.3561,
      "step": 2455
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.4678512513637543,
      "learning_rate": 6.019063118539425e-06,
      "loss": 0.3229,
      "step": 2456
    },
    {
      "epoch": 0.1002,
      "grad_norm": 0.43667295575141907,
      "learning_rate": 6.015645481190912e-06,
      "loss": 0.3361,
      "step": 2457
    },
    {
      "epoch": 0.1004,
      "grad_norm": 0.46674636006355286,
      "learning_rate": 6.01222734882913e-06,
      "loss": 0.3423,
      "step": 2458
    },
    {
      "epoch": 0.1006,
      "grad_norm": 0.5132551193237305,
      "learning_rate": 6.008808723120035e-06,
      "loss": 0.343,
      "step": 2459
    },
    {
      "epoch": 0.1008,
      "grad_norm": 0.4936320185661316,
      "learning_rate": 6.005389605729824e-06,
      "loss": 0.3224,
      "step": 2460
    },
    {
      "epoch": 0.101,
      "grad_norm": 0.39796072244644165,
      "learning_rate": 6.001969998324932e-06,
      "loss": 0.3135,
      "step": 2461
    },
    {
      "epoch": 0.1012,
      "grad_norm": 0.5454707741737366,
      "learning_rate": 5.9985499025720354e-06,
      "loss": 0.3242,
      "step": 2462
    },
    {
      "epoch": 0.1014,
      "grad_norm": 0.508550763130188,
      "learning_rate": 5.995129320138047e-06,
      "loss": 0.3515,
      "step": 2463
    },
    {
      "epoch": 0.1016,
      "grad_norm": 0.5263989567756653,
      "learning_rate": 5.991708252690117e-06,
      "loss": 0.3459,
      "step": 2464
    },
    {
      "epoch": 0.1018,
      "grad_norm": 0.6493260860443115,
      "learning_rate": 5.988286701895631e-06,
      "loss": 0.343,
      "step": 2465
    },
    {
      "epoch": 0.102,
      "grad_norm": 0.4754670262336731,
      "learning_rate": 5.984864669422214e-06,
      "loss": 0.3131,
      "step": 2466
    },
    {
      "epoch": 0.1022,
      "grad_norm": 0.8512911796569824,
      "learning_rate": 5.98144215693772e-06,
      "loss": 0.3279,
      "step": 2467
    },
    {
      "epoch": 0.1024,
      "grad_norm": 0.41736143827438354,
      "learning_rate": 5.978019166110242e-06,
      "loss": 0.3213,
      "step": 2468
    },
    {
      "epoch": 0.1026,
      "grad_norm": 0.48705369234085083,
      "learning_rate": 5.974595698608103e-06,
      "loss": 0.3272,
      "step": 2469
    },
    {
      "epoch": 0.1028,
      "grad_norm": 0.4329937696456909,
      "learning_rate": 5.97117175609986e-06,
      "loss": 0.3383,
      "step": 2470
    },
    {
      "epoch": 0.103,
      "grad_norm": 0.4134720265865326,
      "learning_rate": 5.967747340254303e-06,
      "loss": 0.359,
      "step": 2471
    },
    {
      "epoch": 0.1032,
      "grad_norm": 0.4166122078895569,
      "learning_rate": 5.964322452740445e-06,
      "loss": 0.3513,
      "step": 2472
    },
    {
      "epoch": 0.1034,
      "grad_norm": 0.7419192790985107,
      "learning_rate": 5.960897095227541e-06,
      "loss": 0.3516,
      "step": 2473
    },
    {
      "epoch": 0.1036,
      "grad_norm": 0.5038263201713562,
      "learning_rate": 5.957471269385065e-06,
      "loss": 0.3021,
      "step": 2474
    },
    {
      "epoch": 0.1038,
      "grad_norm": 0.4536159932613373,
      "learning_rate": 5.954044976882725e-06,
      "loss": 0.2982,
      "step": 2475
    },
    {
      "epoch": 0.104,
      "grad_norm": 0.4079515337944031,
      "learning_rate": 5.950618219390451e-06,
      "loss": 0.3172,
      "step": 2476
    },
    {
      "epoch": 0.1042,
      "grad_norm": 0.5302231311798096,
      "learning_rate": 5.947190998578407e-06,
      "loss": 0.35,
      "step": 2477
    },
    {
      "epoch": 0.1044,
      "grad_norm": 0.3868292570114136,
      "learning_rate": 5.943763316116977e-06,
      "loss": 0.3185,
      "step": 2478
    },
    {
      "epoch": 0.1046,
      "grad_norm": 0.44130852818489075,
      "learning_rate": 5.94033517367677e-06,
      "loss": 0.3277,
      "step": 2479
    },
    {
      "epoch": 0.1048,
      "grad_norm": 0.5903106331825256,
      "learning_rate": 5.936906572928625e-06,
      "loss": 0.3284,
      "step": 2480
    },
    {
      "epoch": 0.105,
      "grad_norm": 0.4635830819606781,
      "learning_rate": 5.933477515543595e-06,
      "loss": 0.3268,
      "step": 2481
    },
    {
      "epoch": 0.1052,
      "grad_norm": 0.4567718505859375,
      "learning_rate": 5.930048003192965e-06,
      "loss": 0.3507,
      "step": 2482
    },
    {
      "epoch": 0.1054,
      "grad_norm": 0.6196195483207703,
      "learning_rate": 5.926618037548237e-06,
      "loss": 0.3522,
      "step": 2483
    },
    {
      "epoch": 0.1056,
      "grad_norm": 0.4900142550468445,
      "learning_rate": 5.923187620281135e-06,
      "loss": 0.3359,
      "step": 2484
    },
    {
      "epoch": 0.1058,
      "grad_norm": 0.5153523683547974,
      "learning_rate": 5.919756753063601e-06,
      "loss": 0.3544,
      "step": 2485
    },
    {
      "epoch": 0.106,
      "grad_norm": 0.43452414870262146,
      "learning_rate": 5.9163254375677995e-06,
      "loss": 0.3309,
      "step": 2486
    },
    {
      "epoch": 0.1062,
      "grad_norm": 0.564616858959198,
      "learning_rate": 5.912893675466112e-06,
      "loss": 0.3656,
      "step": 2487
    },
    {
      "epoch": 0.1064,
      "grad_norm": 0.4714284837245941,
      "learning_rate": 5.909461468431135e-06,
      "loss": 0.3563,
      "step": 2488
    },
    {
      "epoch": 0.1066,
      "grad_norm": 0.4320031702518463,
      "learning_rate": 5.906028818135687e-06,
      "loss": 0.3187,
      "step": 2489
    },
    {
      "epoch": 0.1068,
      "grad_norm": 0.4893573522567749,
      "learning_rate": 5.902595726252801e-06,
      "loss": 0.341,
      "step": 2490
    },
    {
      "epoch": 0.107,
      "grad_norm": 0.4728393852710724,
      "learning_rate": 5.8991621944557224e-06,
      "loss": 0.3559,
      "step": 2491
    },
    {
      "epoch": 0.1072,
      "grad_norm": 0.4356817901134491,
      "learning_rate": 5.8957282244179125e-06,
      "loss": 0.3449,
      "step": 2492
    },
    {
      "epoch": 0.1074,
      "grad_norm": 0.5693634152412415,
      "learning_rate": 5.892293817813048e-06,
      "loss": 0.3904,
      "step": 2493
    },
    {
      "epoch": 0.1076,
      "grad_norm": 0.5422914624214172,
      "learning_rate": 5.8888589763150165e-06,
      "loss": 0.364,
      "step": 2494
    },
    {
      "epoch": 0.1078,
      "grad_norm": 0.5389630794525146,
      "learning_rate": 5.885423701597918e-06,
      "loss": 0.3403,
      "step": 2495
    },
    {
      "epoch": 0.108,
      "grad_norm": 0.6175971627235413,
      "learning_rate": 5.881987995336062e-06,
      "loss": 0.3263,
      "step": 2496
    },
    {
      "epoch": 0.1082,
      "grad_norm": 0.5937650203704834,
      "learning_rate": 5.878551859203974e-06,
      "loss": 0.3644,
      "step": 2497
    },
    {
      "epoch": 0.1084,
      "grad_norm": 0.5989448428153992,
      "learning_rate": 5.8751152948763815e-06,
      "loss": 0.3714,
      "step": 2498
    },
    {
      "epoch": 0.1086,
      "grad_norm": 0.6891950368881226,
      "learning_rate": 5.871678304028224e-06,
      "loss": 0.325,
      "step": 2499
    },
    {
      "epoch": 0.1088,
      "grad_norm": 0.7100841403007507,
      "learning_rate": 5.8682408883346535e-06,
      "loss": 0.3304,
      "step": 2500
    },
    {
      "epoch": 0.109,
      "grad_norm": 0.4514957666397095,
      "learning_rate": 5.8648030494710195e-06,
      "loss": 0.3286,
      "step": 2501
    },
    {
      "epoch": 0.1092,
      "grad_norm": 0.5337551236152649,
      "learning_rate": 5.8613647891128845e-06,
      "loss": 0.3542,
      "step": 2502
    },
    {
      "epoch": 0.1094,
      "grad_norm": 0.46276718378067017,
      "learning_rate": 5.857926108936015e-06,
      "loss": 0.3508,
      "step": 2503
    },
    {
      "epoch": 0.1096,
      "grad_norm": 0.4329129159450531,
      "learning_rate": 5.854487010616384e-06,
      "loss": 0.3393,
      "step": 2504
    },
    {
      "epoch": 0.1098,
      "grad_norm": 0.5454452633857727,
      "learning_rate": 5.851047495830163e-06,
      "loss": 0.3598,
      "step": 2505
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.575804591178894,
      "learning_rate": 5.847607566253732e-06,
      "loss": 0.3575,
      "step": 2506
    },
    {
      "epoch": 0.1102,
      "grad_norm": 0.4394480586051941,
      "learning_rate": 5.844167223563669e-06,
      "loss": 0.2993,
      "step": 2507
    },
    {
      "epoch": 0.1104,
      "grad_norm": 0.40095749497413635,
      "learning_rate": 5.840726469436758e-06,
      "loss": 0.3567,
      "step": 2508
    },
    {
      "epoch": 0.1106,
      "grad_norm": 0.4133773446083069,
      "learning_rate": 5.837285305549978e-06,
      "loss": 0.327,
      "step": 2509
    },
    {
      "epoch": 0.1108,
      "grad_norm": 0.3487693667411804,
      "learning_rate": 5.8338437335805124e-06,
      "loss": 0.3248,
      "step": 2510
    },
    {
      "epoch": 0.111,
      "grad_norm": 0.4703841209411621,
      "learning_rate": 5.83040175520574e-06,
      "loss": 0.3109,
      "step": 2511
    },
    {
      "epoch": 0.1112,
      "grad_norm": 0.4641053378582001,
      "learning_rate": 5.826959372103239e-06,
      "loss": 0.3266,
      "step": 2512
    },
    {
      "epoch": 0.1114,
      "grad_norm": 0.3931761384010315,
      "learning_rate": 5.823516585950787e-06,
      "loss": 0.2878,
      "step": 2513
    },
    {
      "epoch": 0.1116,
      "grad_norm": 0.4006181061267853,
      "learning_rate": 5.8200733984263556e-06,
      "loss": 0.3571,
      "step": 2514
    },
    {
      "epoch": 0.1118,
      "grad_norm": 0.5352520942687988,
      "learning_rate": 5.816629811208112e-06,
      "loss": 0.3552,
      "step": 2515
    },
    {
      "epoch": 0.112,
      "grad_norm": 0.4576277434825897,
      "learning_rate": 5.813185825974419e-06,
      "loss": 0.3439,
      "step": 2516
    },
    {
      "epoch": 0.1122,
      "grad_norm": 0.5537109971046448,
      "learning_rate": 5.809741444403831e-06,
      "loss": 0.3198,
      "step": 2517
    },
    {
      "epoch": 0.1124,
      "grad_norm": 0.4727891683578491,
      "learning_rate": 5.8062966681751046e-06,
      "loss": 0.3361,
      "step": 2518
    },
    {
      "epoch": 0.1126,
      "grad_norm": 1.659069538116455,
      "learning_rate": 5.802851498967173e-06,
      "loss": 0.3429,
      "step": 2519
    },
    {
      "epoch": 0.1128,
      "grad_norm": 0.6577011942863464,
      "learning_rate": 5.799405938459175e-06,
      "loss": 0.3779,
      "step": 2520
    },
    {
      "epoch": 0.113,
      "grad_norm": 0.6619930267333984,
      "learning_rate": 5.795959988330434e-06,
      "loss": 0.3393,
      "step": 2521
    },
    {
      "epoch": 0.1132,
      "grad_norm": 0.43529364466667175,
      "learning_rate": 5.792513650260465e-06,
      "loss": 0.3447,
      "step": 2522
    },
    {
      "epoch": 0.1134,
      "grad_norm": 0.509809672832489,
      "learning_rate": 5.78906692592897e-06,
      "loss": 0.3303,
      "step": 2523
    },
    {
      "epoch": 0.1136,
      "grad_norm": 0.445214182138443,
      "learning_rate": 5.78561981701584e-06,
      "loss": 0.3223,
      "step": 2524
    },
    {
      "epoch": 0.1138,
      "grad_norm": 0.5089664459228516,
      "learning_rate": 5.782172325201155e-06,
      "loss": 0.3361,
      "step": 2525
    },
    {
      "epoch": 0.114,
      "grad_norm": 0.4775829613208771,
      "learning_rate": 5.778724452165181e-06,
      "loss": 0.3656,
      "step": 2526
    },
    {
      "epoch": 0.1142,
      "grad_norm": 0.6923373341560364,
      "learning_rate": 5.77527619958837e-06,
      "loss": 0.3623,
      "step": 2527
    },
    {
      "epoch": 0.1144,
      "grad_norm": 0.6682674884796143,
      "learning_rate": 5.771827569151357e-06,
      "loss": 0.3328,
      "step": 2528
    },
    {
      "epoch": 0.1146,
      "grad_norm": 0.528679370880127,
      "learning_rate": 5.768378562534962e-06,
      "loss": 0.3671,
      "step": 2529
    },
    {
      "epoch": 0.1148,
      "grad_norm": 0.5196332931518555,
      "learning_rate": 5.764929181420191e-06,
      "loss": 0.3604,
      "step": 2530
    },
    {
      "epoch": 0.115,
      "grad_norm": 0.4479385018348694,
      "learning_rate": 5.761479427488229e-06,
      "loss": 0.3609,
      "step": 2531
    },
    {
      "epoch": 0.1152,
      "grad_norm": 0.6219203472137451,
      "learning_rate": 5.7580293024204455e-06,
      "loss": 0.3384,
      "step": 2532
    },
    {
      "epoch": 0.1154,
      "grad_norm": 0.44779330492019653,
      "learning_rate": 5.7545788078983875e-06,
      "loss": 0.3351,
      "step": 2533
    },
    {
      "epoch": 0.1156,
      "grad_norm": 0.5837389230728149,
      "learning_rate": 5.751127945603786e-06,
      "loss": 0.3062,
      "step": 2534
    },
    {
      "epoch": 0.1158,
      "grad_norm": 0.5400658845901489,
      "learning_rate": 5.747676717218549e-06,
      "loss": 0.3357,
      "step": 2535
    },
    {
      "epoch": 0.116,
      "grad_norm": 0.44721272587776184,
      "learning_rate": 5.744225124424762e-06,
      "loss": 0.3451,
      "step": 2536
    },
    {
      "epoch": 0.1162,
      "grad_norm": 0.4272143840789795,
      "learning_rate": 5.740773168904691e-06,
      "loss": 0.3534,
      "step": 2537
    },
    {
      "epoch": 0.1164,
      "grad_norm": 0.5008043050765991,
      "learning_rate": 5.737320852340776e-06,
      "loss": 0.358,
      "step": 2538
    },
    {
      "epoch": 0.1166,
      "grad_norm": 0.3751415014266968,
      "learning_rate": 5.733868176415633e-06,
      "loss": 0.308,
      "step": 2539
    },
    {
      "epoch": 0.1168,
      "grad_norm": 0.4946294128894806,
      "learning_rate": 5.730415142812059e-06,
      "loss": 0.3273,
      "step": 2540
    },
    {
      "epoch": 0.117,
      "grad_norm": 0.4768286943435669,
      "learning_rate": 5.726961753213016e-06,
      "loss": 0.3189,
      "step": 2541
    },
    {
      "epoch": 0.1172,
      "grad_norm": 0.8663665056228638,
      "learning_rate": 5.723508009301646e-06,
      "loss": 0.363,
      "step": 2542
    },
    {
      "epoch": 0.1174,
      "grad_norm": 1.4301327466964722,
      "learning_rate": 5.720053912761261e-06,
      "loss": 0.3272,
      "step": 2543
    },
    {
      "epoch": 0.1176,
      "grad_norm": 0.8081874251365662,
      "learning_rate": 5.716599465275347e-06,
      "loss": 0.3281,
      "step": 2544
    },
    {
      "epoch": 0.1178,
      "grad_norm": 0.4198618233203888,
      "learning_rate": 5.7131446685275595e-06,
      "loss": 0.3115,
      "step": 2545
    },
    {
      "epoch": 0.118,
      "grad_norm": 0.4546365439891815,
      "learning_rate": 5.709689524201723e-06,
      "loss": 0.3342,
      "step": 2546
    },
    {
      "epoch": 0.1182,
      "grad_norm": 0.4898332953453064,
      "learning_rate": 5.706234033981835e-06,
      "loss": 0.3185,
      "step": 2547
    },
    {
      "epoch": 0.1184,
      "grad_norm": 0.50981605052948,
      "learning_rate": 5.702778199552055e-06,
      "loss": 0.3374,
      "step": 2548
    },
    {
      "epoch": 0.1186,
      "grad_norm": 0.48069900274276733,
      "learning_rate": 5.6993220225967214e-06,
      "loss": 0.3425,
      "step": 2549
    },
    {
      "epoch": 0.1188,
      "grad_norm": 0.4271857738494873,
      "learning_rate": 5.695865504800328e-06,
      "loss": 0.3321,
      "step": 2550
    },
    {
      "epoch": 0.119,
      "grad_norm": 0.6733425855636597,
      "learning_rate": 5.692408647847542e-06,
      "loss": 0.3406,
      "step": 2551
    },
    {
      "epoch": 0.1192,
      "grad_norm": 0.4447859227657318,
      "learning_rate": 5.68895145342319e-06,
      "loss": 0.3262,
      "step": 2552
    },
    {
      "epoch": 0.1194,
      "grad_norm": 0.6120507717132568,
      "learning_rate": 5.685493923212273e-06,
      "loss": 0.3299,
      "step": 2553
    },
    {
      "epoch": 0.1196,
      "grad_norm": 0.5846794843673706,
      "learning_rate": 5.682036058899942e-06,
      "loss": 0.3391,
      "step": 2554
    },
    {
      "epoch": 0.1198,
      "grad_norm": 0.7311544418334961,
      "learning_rate": 5.678577862171523e-06,
      "loss": 0.3086,
      "step": 2555
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.4345288574695587,
      "learning_rate": 5.675119334712496e-06,
      "loss": 0.3272,
      "step": 2556
    },
    {
      "epoch": 0.1202,
      "grad_norm": 0.42603060603141785,
      "learning_rate": 5.671660478208508e-06,
      "loss": 0.3378,
      "step": 2557
    },
    {
      "epoch": 0.1204,
      "grad_norm": 0.473880410194397,
      "learning_rate": 5.668201294345363e-06,
      "loss": 0.3686,
      "step": 2558
    },
    {
      "epoch": 0.1206,
      "grad_norm": 0.5345448851585388,
      "learning_rate": 5.6647417848090225e-06,
      "loss": 0.3502,
      "step": 2559
    },
    {
      "epoch": 0.1208,
      "grad_norm": 0.48460853099823,
      "learning_rate": 5.661281951285613e-06,
      "loss": 0.3817,
      "step": 2560
    },
    {
      "epoch": 0.121,
      "grad_norm": 0.5077294111251831,
      "learning_rate": 5.657821795461413e-06,
      "loss": 0.3324,
      "step": 2561
    },
    {
      "epoch": 0.1212,
      "grad_norm": 0.49676749110221863,
      "learning_rate": 5.654361319022862e-06,
      "loss": 0.3499,
      "step": 2562
    },
    {
      "epoch": 0.1214,
      "grad_norm": 0.5054965019226074,
      "learning_rate": 5.650900523656553e-06,
      "loss": 0.4072,
      "step": 2563
    },
    {
      "epoch": 0.1216,
      "grad_norm": 0.5544432401657104,
      "learning_rate": 5.647439411049235e-06,
      "loss": 0.357,
      "step": 2564
    },
    {
      "epoch": 0.1218,
      "grad_norm": 0.5697183609008789,
      "learning_rate": 5.643977982887815e-06,
      "loss": 0.3257,
      "step": 2565
    },
    {
      "epoch": 0.122,
      "grad_norm": 0.42228376865386963,
      "learning_rate": 5.640516240859348e-06,
      "loss": 0.2956,
      "step": 2566
    },
    {
      "epoch": 0.1222,
      "grad_norm": 0.4598306715488434,
      "learning_rate": 5.6370541866510476e-06,
      "loss": 0.382,
      "step": 2567
    },
    {
      "epoch": 0.1224,
      "grad_norm": 0.5059530138969421,
      "learning_rate": 5.633591821950274e-06,
      "loss": 0.3596,
      "step": 2568
    },
    {
      "epoch": 0.1226,
      "grad_norm": 0.4355121850967407,
      "learning_rate": 5.630129148444543e-06,
      "loss": 0.3252,
      "step": 2569
    },
    {
      "epoch": 0.1228,
      "grad_norm": 0.3972122073173523,
      "learning_rate": 5.626666167821522e-06,
      "loss": 0.3192,
      "step": 2570
    },
    {
      "epoch": 0.123,
      "grad_norm": 0.4849396049976349,
      "learning_rate": 5.623202881769023e-06,
      "loss": 0.3366,
      "step": 2571
    },
    {
      "epoch": 0.1232,
      "grad_norm": 0.6177840232849121,
      "learning_rate": 5.6197392919750095e-06,
      "loss": 0.3377,
      "step": 2572
    },
    {
      "epoch": 0.1234,
      "grad_norm": 0.4498003423213959,
      "learning_rate": 5.616275400127594e-06,
      "loss": 0.3302,
      "step": 2573
    },
    {
      "epoch": 0.1236,
      "grad_norm": 0.4546942412853241,
      "learning_rate": 5.612811207915034e-06,
      "loss": 0.3456,
      "step": 2574
    },
    {
      "epoch": 0.1238,
      "grad_norm": 0.5279334783554077,
      "learning_rate": 5.609346717025738e-06,
      "loss": 0.3236,
      "step": 2575
    },
    {
      "epoch": 0.124,
      "grad_norm": 0.46980607509613037,
      "learning_rate": 5.605881929148254e-06,
      "loss": 0.3409,
      "step": 2576
    },
    {
      "epoch": 0.1242,
      "grad_norm": 0.4053809642791748,
      "learning_rate": 5.6024168459712765e-06,
      "loss": 0.2996,
      "step": 2577
    },
    {
      "epoch": 0.1244,
      "grad_norm": 0.4337459206581116,
      "learning_rate": 5.598951469183649e-06,
      "loss": 0.3286,
      "step": 2578
    },
    {
      "epoch": 0.1246,
      "grad_norm": 0.41953104734420776,
      "learning_rate": 5.59548580047435e-06,
      "loss": 0.3269,
      "step": 2579
    },
    {
      "epoch": 0.1248,
      "grad_norm": 0.6872628331184387,
      "learning_rate": 5.592019841532507e-06,
      "loss": 0.3373,
      "step": 2580
    },
    {
      "epoch": 0.125,
      "grad_norm": 0.46863850951194763,
      "learning_rate": 5.588553594047382e-06,
      "loss": 0.3331,
      "step": 2581
    },
    {
      "epoch": 0.1252,
      "grad_norm": 0.433699369430542,
      "learning_rate": 5.585087059708389e-06,
      "loss": 0.4037,
      "step": 2582
    },
    {
      "epoch": 0.1254,
      "grad_norm": 1.0763322114944458,
      "learning_rate": 5.581620240205068e-06,
      "loss": 0.3336,
      "step": 2583
    },
    {
      "epoch": 0.1256,
      "grad_norm": 0.46747028827667236,
      "learning_rate": 5.578153137227109e-06,
      "loss": 0.3829,
      "step": 2584
    },
    {
      "epoch": 0.1258,
      "grad_norm": 0.43730711936950684,
      "learning_rate": 5.5746857524643335e-06,
      "loss": 0.3238,
      "step": 2585
    },
    {
      "epoch": 0.126,
      "grad_norm": 0.45250073075294495,
      "learning_rate": 5.5712180876067045e-06,
      "loss": 0.3105,
      "step": 2586
    },
    {
      "epoch": 0.1262,
      "grad_norm": 0.47033315896987915,
      "learning_rate": 5.567750144344318e-06,
      "loss": 0.3393,
      "step": 2587
    },
    {
      "epoch": 0.1264,
      "grad_norm": 0.45609211921691895,
      "learning_rate": 5.5642819243674085e-06,
      "loss": 0.34,
      "step": 2588
    },
    {
      "epoch": 0.1266,
      "grad_norm": 0.3845784366130829,
      "learning_rate": 5.560813429366345e-06,
      "loss": 0.3282,
      "step": 2589
    },
    {
      "epoch": 0.1268,
      "grad_norm": 0.38186946511268616,
      "learning_rate": 5.557344661031628e-06,
      "loss": 0.3354,
      "step": 2590
    },
    {
      "epoch": 0.127,
      "grad_norm": 0.5236292481422424,
      "learning_rate": 5.553875621053893e-06,
      "loss": 0.3416,
      "step": 2591
    },
    {
      "epoch": 0.1272,
      "grad_norm": 0.3920409381389618,
      "learning_rate": 5.5504063111239116e-06,
      "loss": 0.3271,
      "step": 2592
    },
    {
      "epoch": 0.1274,
      "grad_norm": 0.4052762985229492,
      "learning_rate": 5.546936732932578e-06,
      "loss": 0.3248,
      "step": 2593
    },
    {
      "epoch": 0.1276,
      "grad_norm": 0.7514827847480774,
      "learning_rate": 5.543466888170927e-06,
      "loss": 0.3472,
      "step": 2594
    },
    {
      "epoch": 0.1278,
      "grad_norm": 0.5934627652168274,
      "learning_rate": 5.539996778530114e-06,
      "loss": 0.3438,
      "step": 2595
    },
    {
      "epoch": 0.128,
      "grad_norm": 0.6094369888305664,
      "learning_rate": 5.536526405701433e-06,
      "loss": 0.3181,
      "step": 2596
    },
    {
      "epoch": 0.1282,
      "grad_norm": 0.5669367909431458,
      "learning_rate": 5.5330557713763e-06,
      "loss": 0.3534,
      "step": 2597
    },
    {
      "epoch": 0.1284,
      "grad_norm": 0.4969855546951294,
      "learning_rate": 5.52958487724626e-06,
      "loss": 0.3374,
      "step": 2598
    },
    {
      "epoch": 0.1286,
      "grad_norm": 0.6923285722732544,
      "learning_rate": 5.526113725002984e-06,
      "loss": 0.347,
      "step": 2599
    },
    {
      "epoch": 0.1288,
      "grad_norm": 0.4820356070995331,
      "learning_rate": 5.522642316338268e-06,
      "loss": 0.3643,
      "step": 2600
    },
    {
      "epoch": 0.129,
      "grad_norm": 0.5885396003723145,
      "learning_rate": 5.519170652944037e-06,
      "loss": 0.3323,
      "step": 2601
    },
    {
      "epoch": 0.1292,
      "grad_norm": 0.4667416214942932,
      "learning_rate": 5.515698736512337e-06,
      "loss": 0.3411,
      "step": 2602
    },
    {
      "epoch": 0.1294,
      "grad_norm": 0.4472252428531647,
      "learning_rate": 5.512226568735338e-06,
      "loss": 0.3149,
      "step": 2603
    },
    {
      "epoch": 0.1296,
      "grad_norm": 0.603427529335022,
      "learning_rate": 5.508754151305332e-06,
      "loss": 0.3412,
      "step": 2604
    },
    {
      "epoch": 0.1298,
      "grad_norm": 1.040480375289917,
      "learning_rate": 5.505281485914732e-06,
      "loss": 0.3322,
      "step": 2605
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.48509418964385986,
      "learning_rate": 5.5018085742560745e-06,
      "loss": 0.3237,
      "step": 2606
    },
    {
      "epoch": 0.1302,
      "grad_norm": 0.45700499415397644,
      "learning_rate": 5.498335418022015e-06,
      "loss": 0.359,
      "step": 2607
    },
    {
      "epoch": 0.1304,
      "grad_norm": 0.48253506422042847,
      "learning_rate": 5.4948620189053255e-06,
      "loss": 0.3794,
      "step": 2608
    },
    {
      "epoch": 0.1306,
      "grad_norm": 0.5151655077934265,
      "learning_rate": 5.491388378598899e-06,
      "loss": 0.3478,
      "step": 2609
    },
    {
      "epoch": 0.1308,
      "grad_norm": 0.5378462076187134,
      "learning_rate": 5.487914498795748e-06,
      "loss": 0.3071,
      "step": 2610
    },
    {
      "epoch": 0.131,
      "grad_norm": 0.4408649504184723,
      "learning_rate": 5.484440381188997e-06,
      "loss": 0.3469,
      "step": 2611
    },
    {
      "epoch": 0.1312,
      "grad_norm": 0.3426961898803711,
      "learning_rate": 5.480966027471889e-06,
      "loss": 0.2789,
      "step": 2612
    },
    {
      "epoch": 0.1314,
      "grad_norm": 0.6163522005081177,
      "learning_rate": 5.477491439337782e-06,
      "loss": 0.3627,
      "step": 2613
    },
    {
      "epoch": 0.1316,
      "grad_norm": 0.5602014064788818,
      "learning_rate": 5.474016618480147e-06,
      "loss": 0.3637,
      "step": 2614
    },
    {
      "epoch": 0.1318,
      "grad_norm": 0.431556761264801,
      "learning_rate": 5.470541566592573e-06,
      "loss": 0.3774,
      "step": 2615
    },
    {
      "epoch": 0.132,
      "grad_norm": 0.49795183539390564,
      "learning_rate": 5.467066285368754e-06,
      "loss": 0.3817,
      "step": 2616
    },
    {
      "epoch": 0.1322,
      "grad_norm": 0.5454168319702148,
      "learning_rate": 5.463590776502501e-06,
      "loss": 0.3165,
      "step": 2617
    },
    {
      "epoch": 0.1324,
      "grad_norm": 0.4711078405380249,
      "learning_rate": 5.460115041687737e-06,
      "loss": 0.331,
      "step": 2618
    },
    {
      "epoch": 0.1326,
      "grad_norm": 0.5273987650871277,
      "learning_rate": 5.456639082618489e-06,
      "loss": 0.352,
      "step": 2619
    },
    {
      "epoch": 0.1328,
      "grad_norm": 0.4167407155036926,
      "learning_rate": 5.453162900988902e-06,
      "loss": 0.3629,
      "step": 2620
    },
    {
      "epoch": 0.133,
      "grad_norm": 0.47276294231414795,
      "learning_rate": 5.449686498493219e-06,
      "loss": 0.336,
      "step": 2621
    },
    {
      "epoch": 0.1332,
      "grad_norm": 0.48313355445861816,
      "learning_rate": 5.446209876825803e-06,
      "loss": 0.3689,
      "step": 2622
    },
    {
      "epoch": 0.1334,
      "grad_norm": 0.4602978825569153,
      "learning_rate": 5.442733037681112e-06,
      "loss": 0.3422,
      "step": 2623
    },
    {
      "epoch": 0.1336,
      "grad_norm": 0.44639089703559875,
      "learning_rate": 5.439255982753717e-06,
      "loss": 0.3592,
      "step": 2624
    },
    {
      "epoch": 0.1338,
      "grad_norm": 0.5094977021217346,
      "learning_rate": 5.435778713738292e-06,
      "loss": 0.3503,
      "step": 2625
    },
    {
      "epoch": 0.134,
      "grad_norm": 0.4401250183582306,
      "learning_rate": 5.432301232329615e-06,
      "loss": 0.3399,
      "step": 2626
    },
    {
      "epoch": 0.1342,
      "grad_norm": 0.4348141849040985,
      "learning_rate": 5.42882354022257e-06,
      "loss": 0.3049,
      "step": 2627
    },
    {
      "epoch": 0.1344,
      "grad_norm": 0.41097167134284973,
      "learning_rate": 5.425345639112141e-06,
      "loss": 0.3328,
      "step": 2628
    },
    {
      "epoch": 0.1346,
      "grad_norm": 1.2424383163452148,
      "learning_rate": 5.4218675306934145e-06,
      "loss": 0.3462,
      "step": 2629
    },
    {
      "epoch": 0.1348,
      "grad_norm": 0.5841127038002014,
      "learning_rate": 5.41838921666158e-06,
      "loss": 0.3332,
      "step": 2630
    },
    {
      "epoch": 0.135,
      "grad_norm": 0.5280335545539856,
      "learning_rate": 5.41491069871192e-06,
      "loss": 0.3298,
      "step": 2631
    },
    {
      "epoch": 0.1352,
      "grad_norm": 0.40219050645828247,
      "learning_rate": 5.411431978539829e-06,
      "loss": 0.3192,
      "step": 2632
    },
    {
      "epoch": 0.1354,
      "grad_norm": 0.42326822876930237,
      "learning_rate": 5.4079530578407895e-06,
      "loss": 0.3101,
      "step": 2633
    },
    {
      "epoch": 0.1356,
      "grad_norm": 0.4421539008617401,
      "learning_rate": 5.404473938310384e-06,
      "loss": 0.3369,
      "step": 2634
    },
    {
      "epoch": 0.1358,
      "grad_norm": 0.5608934164047241,
      "learning_rate": 5.400994621644294e-06,
      "loss": 0.2989,
      "step": 2635
    },
    {
      "epoch": 0.136,
      "grad_norm": 0.3942048251628876,
      "learning_rate": 5.3975151095383e-06,
      "loss": 0.319,
      "step": 2636
    },
    {
      "epoch": 0.1362,
      "grad_norm": 0.4985632002353668,
      "learning_rate": 5.394035403688268e-06,
      "loss": 0.3613,
      "step": 2637
    },
    {
      "epoch": 0.1364,
      "grad_norm": 0.5739010572433472,
      "learning_rate": 5.390555505790168e-06,
      "loss": 0.3539,
      "step": 2638
    },
    {
      "epoch": 0.1366,
      "grad_norm": 0.6161050796508789,
      "learning_rate": 5.3870754175400595e-06,
      "loss": 0.3606,
      "step": 2639
    },
    {
      "epoch": 0.1368,
      "grad_norm": 0.4394353926181793,
      "learning_rate": 5.383595140634093e-06,
      "loss": 0.3539,
      "step": 2640
    },
    {
      "epoch": 0.137,
      "grad_norm": 1.4183363914489746,
      "learning_rate": 5.380114676768516e-06,
      "loss": 0.3193,
      "step": 2641
    },
    {
      "epoch": 0.1372,
      "grad_norm": 0.45763301849365234,
      "learning_rate": 5.376634027639664e-06,
      "loss": 0.3318,
      "step": 2642
    },
    {
      "epoch": 0.1374,
      "grad_norm": 0.47081637382507324,
      "learning_rate": 5.373153194943962e-06,
      "loss": 0.368,
      "step": 2643
    },
    {
      "epoch": 0.1376,
      "grad_norm": 0.5137501955032349,
      "learning_rate": 5.3696721803779265e-06,
      "loss": 0.3165,
      "step": 2644
    },
    {
      "epoch": 0.1378,
      "grad_norm": 0.3951941728591919,
      "learning_rate": 5.366190985638159e-06,
      "loss": 0.2993,
      "step": 2645
    },
    {
      "epoch": 0.138,
      "grad_norm": 0.5165127515792847,
      "learning_rate": 5.362709612421355e-06,
      "loss": 0.3046,
      "step": 2646
    },
    {
      "epoch": 0.1382,
      "grad_norm": 0.45185208320617676,
      "learning_rate": 5.359228062424292e-06,
      "loss": 0.3277,
      "step": 2647
    },
    {
      "epoch": 0.1384,
      "grad_norm": 0.6852658987045288,
      "learning_rate": 5.355746337343835e-06,
      "loss": 0.327,
      "step": 2648
    },
    {
      "epoch": 0.1386,
      "grad_norm": 0.44578272104263306,
      "learning_rate": 5.352264438876935e-06,
      "loss": 0.3606,
      "step": 2649
    },
    {
      "epoch": 0.1388,
      "grad_norm": 0.4570469856262207,
      "learning_rate": 5.348782368720627e-06,
      "loss": 0.3404,
      "step": 2650
    },
    {
      "epoch": 0.139,
      "grad_norm": 0.6016443967819214,
      "learning_rate": 5.345300128572031e-06,
      "loss": 0.3064,
      "step": 2651
    },
    {
      "epoch": 0.1392,
      "grad_norm": 0.5311416387557983,
      "learning_rate": 5.341817720128344e-06,
      "loss": 0.3717,
      "step": 2652
    },
    {
      "epoch": 0.1394,
      "grad_norm": 0.4654076099395752,
      "learning_rate": 5.338335145086855e-06,
      "loss": 0.3278,
      "step": 2653
    },
    {
      "epoch": 0.1396,
      "grad_norm": 0.4560227692127228,
      "learning_rate": 5.334852405144926e-06,
      "loss": 0.3293,
      "step": 2654
    },
    {
      "epoch": 0.1398,
      "grad_norm": 0.5013658404350281,
      "learning_rate": 5.3313695020000026e-06,
      "loss": 0.3961,
      "step": 2655
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.5590485334396362,
      "learning_rate": 5.327886437349609e-06,
      "loss": 0.3484,
      "step": 2656
    },
    {
      "epoch": 0.1402,
      "grad_norm": 0.39753633737564087,
      "learning_rate": 5.3244032128913485e-06,
      "loss": 0.3203,
      "step": 2657
    },
    {
      "epoch": 0.1404,
      "grad_norm": 0.5390545129776001,
      "learning_rate": 5.320919830322903e-06,
      "loss": 0.3273,
      "step": 2658
    },
    {
      "epoch": 0.1406,
      "grad_norm": 0.44448986649513245,
      "learning_rate": 5.317436291342031e-06,
      "loss": 0.3665,
      "step": 2659
    },
    {
      "epoch": 0.1408,
      "grad_norm": 0.5239413380622864,
      "learning_rate": 5.3139525976465675e-06,
      "loss": 0.3586,
      "step": 2660
    },
    {
      "epoch": 0.141,
      "grad_norm": 0.5007572770118713,
      "learning_rate": 5.310468750934421e-06,
      "loss": 0.3411,
      "step": 2661
    },
    {
      "epoch": 0.1412,
      "grad_norm": 0.4657650589942932,
      "learning_rate": 5.306984752903578e-06,
      "loss": 0.3487,
      "step": 2662
    },
    {
      "epoch": 0.1414,
      "grad_norm": 0.8615016341209412,
      "learning_rate": 5.303500605252095e-06,
      "loss": 0.3262,
      "step": 2663
    },
    {
      "epoch": 0.1416,
      "grad_norm": 0.4359201192855835,
      "learning_rate": 5.300016309678104e-06,
      "loss": 0.3165,
      "step": 2664
    },
    {
      "epoch": 0.1418,
      "grad_norm": 0.38435831665992737,
      "learning_rate": 5.296531867879809e-06,
      "loss": 0.3204,
      "step": 2665
    },
    {
      "epoch": 0.142,
      "grad_norm": 0.5041138529777527,
      "learning_rate": 5.293047281555482e-06,
      "loss": 0.3392,
      "step": 2666
    },
    {
      "epoch": 0.1422,
      "grad_norm": 0.4539742171764374,
      "learning_rate": 5.289562552403472e-06,
      "loss": 0.3551,
      "step": 2667
    },
    {
      "epoch": 0.1424,
      "grad_norm": 0.5192986726760864,
      "learning_rate": 5.2860776821221915e-06,
      "loss": 0.3075,
      "step": 2668
    },
    {
      "epoch": 0.1426,
      "grad_norm": 0.5132282376289368,
      "learning_rate": 5.282592672410124e-06,
      "loss": 0.3156,
      "step": 2669
    },
    {
      "epoch": 0.1428,
      "grad_norm": 0.42370420694351196,
      "learning_rate": 5.27910752496582e-06,
      "loss": 0.3331,
      "step": 2670
    },
    {
      "epoch": 0.143,
      "grad_norm": 0.48373469710350037,
      "learning_rate": 5.275622241487899e-06,
      "loss": 0.3388,
      "step": 2671
    },
    {
      "epoch": 0.1432,
      "grad_norm": 0.4567534327507019,
      "learning_rate": 5.272136823675046e-06,
      "loss": 0.3308,
      "step": 2672
    },
    {
      "epoch": 0.1434,
      "grad_norm": 0.4519708752632141,
      "learning_rate": 5.268651273226011e-06,
      "loss": 0.3504,
      "step": 2673
    },
    {
      "epoch": 0.1436,
      "grad_norm": 0.4126659035682678,
      "learning_rate": 5.26516559183961e-06,
      "loss": 0.3393,
      "step": 2674
    },
    {
      "epoch": 0.1438,
      "grad_norm": 0.5269172191619873,
      "learning_rate": 5.2616797812147205e-06,
      "loss": 0.3686,
      "step": 2675
    },
    {
      "epoch": 0.144,
      "grad_norm": 0.4245145320892334,
      "learning_rate": 5.258193843050283e-06,
      "loss": 0.3117,
      "step": 2676
    },
    {
      "epoch": 0.1442,
      "grad_norm": 0.49999627470970154,
      "learning_rate": 5.254707779045305e-06,
      "loss": 0.3136,
      "step": 2677
    },
    {
      "epoch": 0.1444,
      "grad_norm": 0.4373839795589447,
      "learning_rate": 5.251221590898848e-06,
      "loss": 0.3443,
      "step": 2678
    },
    {
      "epoch": 0.1446,
      "grad_norm": 0.5152217149734497,
      "learning_rate": 5.247735280310041e-06,
      "loss": 0.3449,
      "step": 2679
    },
    {
      "epoch": 0.1448,
      "grad_norm": 0.42065057158470154,
      "learning_rate": 5.244248848978067e-06,
      "loss": 0.3345,
      "step": 2680
    },
    {
      "epoch": 0.145,
      "grad_norm": 0.4965130388736725,
      "learning_rate": 5.240762298602171e-06,
      "loss": 0.3703,
      "step": 2681
    },
    {
      "epoch": 0.1452,
      "grad_norm": 0.4254762828350067,
      "learning_rate": 5.237275630881658e-06,
      "loss": 0.3338,
      "step": 2682
    },
    {
      "epoch": 0.1454,
      "grad_norm": 0.6184040307998657,
      "learning_rate": 5.233788847515882e-06,
      "loss": 0.3149,
      "step": 2683
    },
    {
      "epoch": 0.1456,
      "grad_norm": 0.6969699859619141,
      "learning_rate": 5.230301950204261e-06,
      "loss": 0.3533,
      "step": 2684
    },
    {
      "epoch": 0.1458,
      "grad_norm": 0.403861939907074,
      "learning_rate": 5.226814940646268e-06,
      "loss": 0.307,
      "step": 2685
    },
    {
      "epoch": 0.146,
      "grad_norm": 0.5229875445365906,
      "learning_rate": 5.223327820541432e-06,
      "loss": 0.3155,
      "step": 2686
    },
    {
      "epoch": 0.1462,
      "grad_norm": 0.5536125898361206,
      "learning_rate": 5.219840591589325e-06,
      "loss": 0.3353,
      "step": 2687
    },
    {
      "epoch": 0.1464,
      "grad_norm": 0.5897732973098755,
      "learning_rate": 5.216353255489586e-06,
      "loss": 0.3706,
      "step": 2688
    },
    {
      "epoch": 0.1466,
      "grad_norm": 0.6129360795021057,
      "learning_rate": 5.212865813941899e-06,
      "loss": 0.3346,
      "step": 2689
    },
    {
      "epoch": 0.1468,
      "grad_norm": 0.47016003727912903,
      "learning_rate": 5.209378268645998e-06,
      "loss": 0.3144,
      "step": 2690
    },
    {
      "epoch": 0.147,
      "grad_norm": 0.5639607310295105,
      "learning_rate": 5.205890621301676e-06,
      "loss": 0.3479,
      "step": 2691
    },
    {
      "epoch": 0.1472,
      "grad_norm": 0.6042150259017944,
      "learning_rate": 5.202402873608763e-06,
      "loss": 0.3575,
      "step": 2692
    },
    {
      "epoch": 0.1474,
      "grad_norm": 0.49998340010643005,
      "learning_rate": 5.19891502726715e-06,
      "loss": 0.3495,
      "step": 2693
    },
    {
      "epoch": 0.1476,
      "grad_norm": 0.43834683299064636,
      "learning_rate": 5.195427083976768e-06,
      "loss": 0.3442,
      "step": 2694
    },
    {
      "epoch": 0.1478,
      "grad_norm": 0.5355640053749084,
      "learning_rate": 5.1919390454376e-06,
      "loss": 0.361,
      "step": 2695
    },
    {
      "epoch": 0.148,
      "grad_norm": 0.49302321672439575,
      "learning_rate": 5.188450913349674e-06,
      "loss": 0.3269,
      "step": 2696
    },
    {
      "epoch": 0.1482,
      "grad_norm": 0.46516844630241394,
      "learning_rate": 5.18496268941306e-06,
      "loss": 0.3666,
      "step": 2697
    },
    {
      "epoch": 0.1484,
      "grad_norm": 0.5970322489738464,
      "learning_rate": 5.18147437532788e-06,
      "loss": 0.3525,
      "step": 2698
    },
    {
      "epoch": 0.1486,
      "grad_norm": 0.582413911819458,
      "learning_rate": 5.177985972794293e-06,
      "loss": 0.349,
      "step": 2699
    },
    {
      "epoch": 0.1488,
      "grad_norm": 0.4638438820838928,
      "learning_rate": 5.174497483512506e-06,
      "loss": 0.312,
      "step": 2700
    },
    {
      "epoch": 0.149,
      "grad_norm": 0.695012629032135,
      "learning_rate": 5.171008909182765e-06,
      "loss": 0.324,
      "step": 2701
    },
    {
      "epoch": 0.1492,
      "grad_norm": 0.5416454672813416,
      "learning_rate": 5.167520251505358e-06,
      "loss": 0.3838,
      "step": 2702
    },
    {
      "epoch": 0.1494,
      "grad_norm": 0.5360061526298523,
      "learning_rate": 5.164031512180616e-06,
      "loss": 0.3838,
      "step": 2703
    },
    {
      "epoch": 0.1496,
      "grad_norm": 0.5804004073143005,
      "learning_rate": 5.160542692908909e-06,
      "loss": 0.355,
      "step": 2704
    },
    {
      "epoch": 0.1498,
      "grad_norm": 1.042061686515808,
      "learning_rate": 5.157053795390642e-06,
      "loss": 0.3492,
      "step": 2705
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.5061509609222412,
      "learning_rate": 5.153564821326265e-06,
      "loss": 0.3344,
      "step": 2706
    },
    {
      "epoch": 0.1502,
      "grad_norm": 0.4750489890575409,
      "learning_rate": 5.150075772416256e-06,
      "loss": 0.3325,
      "step": 2707
    },
    {
      "epoch": 0.1504,
      "grad_norm": 0.43213358521461487,
      "learning_rate": 5.146586650361143e-06,
      "loss": 0.2942,
      "step": 2708
    },
    {
      "epoch": 0.1506,
      "grad_norm": 0.4673366844654083,
      "learning_rate": 5.143097456861474e-06,
      "loss": 0.3144,
      "step": 2709
    },
    {
      "epoch": 0.1508,
      "grad_norm": 0.588631272315979,
      "learning_rate": 5.139608193617846e-06,
      "loss": 0.303,
      "step": 2710
    },
    {
      "epoch": 0.151,
      "grad_norm": 0.45653167366981506,
      "learning_rate": 5.136118862330876e-06,
      "loss": 0.3139,
      "step": 2711
    },
    {
      "epoch": 0.1512,
      "grad_norm": 0.6296796202659607,
      "learning_rate": 5.13262946470123e-06,
      "loss": 0.3643,
      "step": 2712
    },
    {
      "epoch": 0.1514,
      "grad_norm": 0.466459721326828,
      "learning_rate": 5.1291400024295946e-06,
      "loss": 0.3294,
      "step": 2713
    },
    {
      "epoch": 0.1516,
      "grad_norm": 0.5075957179069519,
      "learning_rate": 5.1256504772166885e-06,
      "loss": 0.3219,
      "step": 2714
    },
    {
      "epoch": 0.1518,
      "grad_norm": 0.5460566282272339,
      "learning_rate": 5.1221608907632665e-06,
      "loss": 0.3681,
      "step": 2715
    },
    {
      "epoch": 0.152,
      "grad_norm": 0.5663864612579346,
      "learning_rate": 5.118671244770111e-06,
      "loss": 0.3344,
      "step": 2716
    },
    {
      "epoch": 0.1522,
      "grad_norm": 0.5472123026847839,
      "learning_rate": 5.115181540938032e-06,
      "loss": 0.3582,
      "step": 2717
    },
    {
      "epoch": 0.1524,
      "grad_norm": 0.41221973299980164,
      "learning_rate": 5.111691780967869e-06,
      "loss": 0.3688,
      "step": 2718
    },
    {
      "epoch": 0.1526,
      "grad_norm": 0.5031256675720215,
      "learning_rate": 5.1082019665604895e-06,
      "loss": 0.2937,
      "step": 2719
    },
    {
      "epoch": 0.1528,
      "grad_norm": 0.4840245544910431,
      "learning_rate": 5.1047120994167855e-06,
      "loss": 0.3051,
      "step": 2720
    },
    {
      "epoch": 0.153,
      "grad_norm": 0.51397305727005,
      "learning_rate": 5.101222181237676e-06,
      "loss": 0.3432,
      "step": 2721
    },
    {
      "epoch": 0.1532,
      "grad_norm": 0.6550561189651489,
      "learning_rate": 5.097732213724107e-06,
      "loss": 0.3572,
      "step": 2722
    },
    {
      "epoch": 0.1534,
      "grad_norm": 0.5406279563903809,
      "learning_rate": 5.0942421985770415e-06,
      "loss": 0.3526,
      "step": 2723
    },
    {
      "epoch": 0.1536,
      "grad_norm": 0.4898490011692047,
      "learning_rate": 5.090752137497474e-06,
      "loss": 0.3375,
      "step": 2724
    },
    {
      "epoch": 0.1538,
      "grad_norm": 1.088559865951538,
      "learning_rate": 5.087262032186418e-06,
      "loss": 0.3135,
      "step": 2725
    },
    {
      "epoch": 0.154,
      "grad_norm": 0.49574047327041626,
      "learning_rate": 5.083771884344908e-06,
      "loss": 0.3135,
      "step": 2726
    },
    {
      "epoch": 0.1542,
      "grad_norm": 0.549225389957428,
      "learning_rate": 5.080281695673999e-06,
      "loss": 0.3293,
      "step": 2727
    },
    {
      "epoch": 0.1544,
      "grad_norm": 0.5106198191642761,
      "learning_rate": 5.0767914678747655e-06,
      "loss": 0.3652,
      "step": 2728
    },
    {
      "epoch": 0.1546,
      "grad_norm": 0.5626071691513062,
      "learning_rate": 5.073301202648304e-06,
      "loss": 0.3417,
      "step": 2729
    },
    {
      "epoch": 0.1548,
      "grad_norm": 0.509942352771759,
      "learning_rate": 5.069810901695727e-06,
      "loss": 0.3474,
      "step": 2730
    },
    {
      "epoch": 0.155,
      "grad_norm": 0.41371384263038635,
      "learning_rate": 5.066320566718165e-06,
      "loss": 0.3189,
      "step": 2731
    },
    {
      "epoch": 0.1552,
      "grad_norm": 0.5156335234642029,
      "learning_rate": 5.062830199416764e-06,
      "loss": 0.3459,
      "step": 2732
    },
    {
      "epoch": 0.1554,
      "grad_norm": 0.43150073289871216,
      "learning_rate": 5.059339801492687e-06,
      "loss": 0.3242,
      "step": 2733
    },
    {
      "epoch": 0.1556,
      "grad_norm": 0.6418101787567139,
      "learning_rate": 5.055849374647112e-06,
      "loss": 0.3534,
      "step": 2734
    },
    {
      "epoch": 0.1558,
      "grad_norm": 0.8004888892173767,
      "learning_rate": 5.05235892058123e-06,
      "loss": 0.3378,
      "step": 2735
    },
    {
      "epoch": 0.156,
      "grad_norm": 0.4805881679058075,
      "learning_rate": 5.048868440996246e-06,
      "loss": 0.3177,
      "step": 2736
    },
    {
      "epoch": 0.1562,
      "grad_norm": 0.5413293242454529,
      "learning_rate": 5.045377937593376e-06,
      "loss": 0.3413,
      "step": 2737
    },
    {
      "epoch": 0.1564,
      "grad_norm": 0.5448867082595825,
      "learning_rate": 5.041887412073853e-06,
      "loss": 0.3293,
      "step": 2738
    },
    {
      "epoch": 0.1566,
      "grad_norm": 0.4083445966243744,
      "learning_rate": 5.038396866138915e-06,
      "loss": 0.3388,
      "step": 2739
    },
    {
      "epoch": 0.1568,
      "grad_norm": 1.0386245250701904,
      "learning_rate": 5.034906301489808e-06,
      "loss": 0.3293,
      "step": 2740
    },
    {
      "epoch": 0.157,
      "grad_norm": 0.7240959405899048,
      "learning_rate": 5.031415719827796e-06,
      "loss": 0.3563,
      "step": 2741
    },
    {
      "epoch": 0.1572,
      "grad_norm": 0.4728566110134125,
      "learning_rate": 5.027925122854141e-06,
      "loss": 0.3428,
      "step": 2742
    },
    {
      "epoch": 0.1574,
      "grad_norm": 0.499901682138443,
      "learning_rate": 5.024434512270123e-06,
      "loss": 0.3409,
      "step": 2743
    },
    {
      "epoch": 0.1576,
      "grad_norm": 0.46928519010543823,
      "learning_rate": 5.0209438897770205e-06,
      "loss": 0.3433,
      "step": 2744
    },
    {
      "epoch": 0.1578,
      "grad_norm": 0.6434834003448486,
      "learning_rate": 5.0174532570761194e-06,
      "loss": 0.346,
      "step": 2745
    },
    {
      "epoch": 0.158,
      "grad_norm": 0.46378085017204285,
      "learning_rate": 5.013962615868714e-06,
      "loss": 0.3684,
      "step": 2746
    },
    {
      "epoch": 0.1582,
      "grad_norm": 0.5320935845375061,
      "learning_rate": 5.010471967856096e-06,
      "loss": 0.3274,
      "step": 2747
    },
    {
      "epoch": 0.1584,
      "grad_norm": 0.5785768032073975,
      "learning_rate": 5.006981314739573e-06,
      "loss": 0.3049,
      "step": 2748
    },
    {
      "epoch": 0.1586,
      "grad_norm": 0.9381781220436096,
      "learning_rate": 5.003490658220438e-06,
      "loss": 0.3585,
      "step": 2749
    },
    {
      "epoch": 0.1588,
      "grad_norm": 0.5034579634666443,
      "learning_rate": 5e-06,
      "loss": 0.3613,
      "step": 2750
    },
    {
      "epoch": 0.159,
      "grad_norm": 0.5371377468109131,
      "learning_rate": 4.996509341779563e-06,
      "loss": 0.3327,
      "step": 2751
    },
    {
      "epoch": 0.1592,
      "grad_norm": 0.5994710922241211,
      "learning_rate": 4.993018685260428e-06,
      "loss": 0.3188,
      "step": 2752
    },
    {
      "epoch": 0.1594,
      "grad_norm": 1.1557379961013794,
      "learning_rate": 4.9895280321439036e-06,
      "loss": 0.3707,
      "step": 2753
    },
    {
      "epoch": 0.1596,
      "grad_norm": 0.4540460407733917,
      "learning_rate": 4.986037384131288e-06,
      "loss": 0.3479,
      "step": 2754
    },
    {
      "epoch": 0.1598,
      "grad_norm": 0.45428234338760376,
      "learning_rate": 4.982546742923883e-06,
      "loss": 0.3225,
      "step": 2755
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.5195954442024231,
      "learning_rate": 4.979056110222982e-06,
      "loss": 0.3618,
      "step": 2756
    },
    {
      "epoch": 0.1602,
      "grad_norm": 0.6687543988227844,
      "learning_rate": 4.975565487729879e-06,
      "loss": 0.3565,
      "step": 2757
    },
    {
      "epoch": 0.1604,
      "grad_norm": 0.5157641768455505,
      "learning_rate": 4.9720748771458595e-06,
      "loss": 0.3962,
      "step": 2758
    },
    {
      "epoch": 0.1606,
      "grad_norm": 0.47213029861450195,
      "learning_rate": 4.968584280172206e-06,
      "loss": 0.315,
      "step": 2759
    },
    {
      "epoch": 0.1608,
      "grad_norm": 0.6278468370437622,
      "learning_rate": 4.965093698510192e-06,
      "loss": 0.3587,
      "step": 2760
    },
    {
      "epoch": 0.161,
      "grad_norm": 0.963848888874054,
      "learning_rate": 4.961603133861086e-06,
      "loss": 0.3441,
      "step": 2761
    },
    {
      "epoch": 0.1612,
      "grad_norm": 0.5693170428276062,
      "learning_rate": 4.9581125879261476e-06,
      "loss": 0.3178,
      "step": 2762
    },
    {
      "epoch": 0.1614,
      "grad_norm": 0.4870203137397766,
      "learning_rate": 4.954622062406623e-06,
      "loss": 0.3017,
      "step": 2763
    },
    {
      "epoch": 0.1616,
      "grad_norm": 0.688448965549469,
      "learning_rate": 4.951131559003756e-06,
      "loss": 0.3846,
      "step": 2764
    },
    {
      "epoch": 0.1618,
      "grad_norm": 0.4391985237598419,
      "learning_rate": 4.9476410794187726e-06,
      "loss": 0.3383,
      "step": 2765
    },
    {
      "epoch": 0.162,
      "grad_norm": 0.47475066781044006,
      "learning_rate": 4.94415062535289e-06,
      "loss": 0.3133,
      "step": 2766
    },
    {
      "epoch": 0.1622,
      "grad_norm": 0.5285060405731201,
      "learning_rate": 4.940660198507315e-06,
      "loss": 0.2994,
      "step": 2767
    },
    {
      "epoch": 0.1624,
      "grad_norm": 0.5225185751914978,
      "learning_rate": 4.937169800583237e-06,
      "loss": 0.3524,
      "step": 2768
    },
    {
      "epoch": 0.1626,
      "grad_norm": 0.45218998193740845,
      "learning_rate": 4.933679433281837e-06,
      "loss": 0.3385,
      "step": 2769
    },
    {
      "epoch": 0.1628,
      "grad_norm": 0.41330286860466003,
      "learning_rate": 4.9301890983042744e-06,
      "loss": 0.3683,
      "step": 2770
    },
    {
      "epoch": 0.163,
      "grad_norm": 0.5176935195922852,
      "learning_rate": 4.926698797351697e-06,
      "loss": 0.3204,
      "step": 2771
    },
    {
      "epoch": 0.1632,
      "grad_norm": 0.4887668192386627,
      "learning_rate": 4.923208532125235e-06,
      "loss": 0.3294,
      "step": 2772
    },
    {
      "epoch": 0.1634,
      "grad_norm": 0.43265339732170105,
      "learning_rate": 4.9197183043260035e-06,
      "loss": 0.3335,
      "step": 2773
    },
    {
      "epoch": 0.1636,
      "grad_norm": 0.5498007535934448,
      "learning_rate": 4.9162281156550945e-06,
      "loss": 0.3753,
      "step": 2774
    },
    {
      "epoch": 0.1638,
      "grad_norm": 0.5638997554779053,
      "learning_rate": 4.9127379678135825e-06,
      "loss": 0.3338,
      "step": 2775
    },
    {
      "epoch": 0.164,
      "grad_norm": 0.44432753324508667,
      "learning_rate": 4.9092478625025266e-06,
      "loss": 0.3547,
      "step": 2776
    },
    {
      "epoch": 0.1642,
      "grad_norm": 0.6725711822509766,
      "learning_rate": 4.90575780142296e-06,
      "loss": 0.3709,
      "step": 2777
    },
    {
      "epoch": 0.1644,
      "grad_norm": 0.5566431879997253,
      "learning_rate": 4.902267786275895e-06,
      "loss": 0.3393,
      "step": 2778
    },
    {
      "epoch": 0.1646,
      "grad_norm": 0.4478459656238556,
      "learning_rate": 4.898777818762325e-06,
      "loss": 0.3537,
      "step": 2779
    },
    {
      "epoch": 0.1648,
      "grad_norm": 0.43942561745643616,
      "learning_rate": 4.895287900583216e-06,
      "loss": 0.3224,
      "step": 2780
    },
    {
      "epoch": 0.165,
      "grad_norm": 0.5956069827079773,
      "learning_rate": 4.891798033439511e-06,
      "loss": 0.3564,
      "step": 2781
    },
    {
      "epoch": 0.1652,
      "grad_norm": 0.3947772681713104,
      "learning_rate": 4.888308219032133e-06,
      "loss": 0.3419,
      "step": 2782
    },
    {
      "epoch": 0.1654,
      "grad_norm": 0.5991208553314209,
      "learning_rate": 4.88481845906197e-06,
      "loss": 0.3428,
      "step": 2783
    },
    {
      "epoch": 0.1656,
      "grad_norm": 0.5012426972389221,
      "learning_rate": 4.881328755229892e-06,
      "loss": 0.2983,
      "step": 2784
    },
    {
      "epoch": 0.1658,
      "grad_norm": 0.5035091638565063,
      "learning_rate": 4.877839109236735e-06,
      "loss": 0.3569,
      "step": 2785
    },
    {
      "epoch": 0.166,
      "grad_norm": 0.5270794630050659,
      "learning_rate": 4.874349522783313e-06,
      "loss": 0.3551,
      "step": 2786
    },
    {
      "epoch": 0.1662,
      "grad_norm": 0.42647212743759155,
      "learning_rate": 4.870859997570407e-06,
      "loss": 0.3251,
      "step": 2787
    },
    {
      "epoch": 0.1664,
      "grad_norm": 0.8613443970680237,
      "learning_rate": 4.86737053529877e-06,
      "loss": 0.3095,
      "step": 2788
    },
    {
      "epoch": 0.1666,
      "grad_norm": 0.4875284731388092,
      "learning_rate": 4.863881137669123e-06,
      "loss": 0.311,
      "step": 2789
    },
    {
      "epoch": 0.1668,
      "grad_norm": 0.4984796643257141,
      "learning_rate": 4.860391806382157e-06,
      "loss": 0.3565,
      "step": 2790
    },
    {
      "epoch": 0.167,
      "grad_norm": 0.3951871395111084,
      "learning_rate": 4.856902543138528e-06,
      "loss": 0.3161,
      "step": 2791
    },
    {
      "epoch": 0.1672,
      "grad_norm": 0.5198420882225037,
      "learning_rate": 4.853413349638859e-06,
      "loss": 0.3495,
      "step": 2792
    },
    {
      "epoch": 0.1674,
      "grad_norm": 0.40698620676994324,
      "learning_rate": 4.8499242275837444e-06,
      "loss": 0.3294,
      "step": 2793
    },
    {
      "epoch": 0.1676,
      "grad_norm": 0.4249068796634674,
      "learning_rate": 4.846435178673737e-06,
      "loss": 0.3425,
      "step": 2794
    },
    {
      "epoch": 0.1678,
      "grad_norm": 0.45311668515205383,
      "learning_rate": 4.842946204609359e-06,
      "loss": 0.3061,
      "step": 2795
    },
    {
      "epoch": 0.168,
      "grad_norm": 0.4585548937320709,
      "learning_rate": 4.839457307091093e-06,
      "loss": 0.3255,
      "step": 2796
    },
    {
      "epoch": 0.1682,
      "grad_norm": 0.5110889077186584,
      "learning_rate": 4.835968487819384e-06,
      "loss": 0.3573,
      "step": 2797
    },
    {
      "epoch": 0.1684,
      "grad_norm": 0.46775826811790466,
      "learning_rate": 4.832479748494643e-06,
      "loss": 0.3354,
      "step": 2798
    },
    {
      "epoch": 0.1686,
      "grad_norm": 0.4752473831176758,
      "learning_rate": 4.828991090817238e-06,
      "loss": 0.3469,
      "step": 2799
    },
    {
      "epoch": 0.1688,
      "grad_norm": 0.5425744652748108,
      "learning_rate": 4.825502516487497e-06,
      "loss": 0.3321,
      "step": 2800
    },
    {
      "epoch": 0.169,
      "grad_norm": 0.4397556781768799,
      "learning_rate": 4.822014027205708e-06,
      "loss": 0.3279,
      "step": 2801
    },
    {
      "epoch": 0.1692,
      "grad_norm": 0.46849557757377625,
      "learning_rate": 4.818525624672122e-06,
      "loss": 0.3778,
      "step": 2802
    },
    {
      "epoch": 0.1694,
      "grad_norm": 0.5096936821937561,
      "learning_rate": 4.815037310586941e-06,
      "loss": 0.3269,
      "step": 2803
    },
    {
      "epoch": 0.1696,
      "grad_norm": 0.4732552766799927,
      "learning_rate": 4.811549086650327e-06,
      "loss": 0.3545,
      "step": 2804
    },
    {
      "epoch": 0.1698,
      "grad_norm": 0.43264511227607727,
      "learning_rate": 4.8080609545624004e-06,
      "loss": 0.3355,
      "step": 2805
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.47374945878982544,
      "learning_rate": 4.8045729160232326e-06,
      "loss": 0.3374,
      "step": 2806
    },
    {
      "epoch": 0.1702,
      "grad_norm": 1.532331943511963,
      "learning_rate": 4.801084972732851e-06,
      "loss": 0.3462,
      "step": 2807
    },
    {
      "epoch": 0.1704,
      "grad_norm": 0.451113224029541,
      "learning_rate": 4.797597126391238e-06,
      "loss": 0.309,
      "step": 2808
    },
    {
      "epoch": 0.1706,
      "grad_norm": 0.612220048904419,
      "learning_rate": 4.794109378698327e-06,
      "loss": 0.351,
      "step": 2809
    },
    {
      "epoch": 0.1708,
      "grad_norm": 0.5635141134262085,
      "learning_rate": 4.7906217313540035e-06,
      "loss": 0.3477,
      "step": 2810
    },
    {
      "epoch": 0.171,
      "grad_norm": 0.422490656375885,
      "learning_rate": 4.787134186058103e-06,
      "loss": 0.3499,
      "step": 2811
    },
    {
      "epoch": 0.1712,
      "grad_norm": 0.623266875743866,
      "learning_rate": 4.783646744510416e-06,
      "loss": 0.3358,
      "step": 2812
    },
    {
      "epoch": 0.1714,
      "grad_norm": 0.5417162179946899,
      "learning_rate": 4.780159408410677e-06,
      "loss": 0.3185,
      "step": 2813
    },
    {
      "epoch": 0.1716,
      "grad_norm": 0.4613378047943115,
      "learning_rate": 4.7766721794585704e-06,
      "loss": 0.3332,
      "step": 2814
    },
    {
      "epoch": 0.1718,
      "grad_norm": 0.6941378116607666,
      "learning_rate": 4.7731850593537316e-06,
      "loss": 0.3651,
      "step": 2815
    },
    {
      "epoch": 0.172,
      "grad_norm": 0.6017017960548401,
      "learning_rate": 4.769698049795739e-06,
      "loss": 0.3315,
      "step": 2816
    },
    {
      "epoch": 0.1722,
      "grad_norm": 0.47116920351982117,
      "learning_rate": 4.766211152484122e-06,
      "loss": 0.3442,
      "step": 2817
    },
    {
      "epoch": 0.1724,
      "grad_norm": 0.5007385015487671,
      "learning_rate": 4.762724369118346e-06,
      "loss": 0.3618,
      "step": 2818
    },
    {
      "epoch": 0.1726,
      "grad_norm": 0.4642718732357025,
      "learning_rate": 4.759237701397831e-06,
      "loss": 0.3454,
      "step": 2819
    },
    {
      "epoch": 0.1728,
      "grad_norm": 0.6436984539031982,
      "learning_rate": 4.755751151021934e-06,
      "loss": 0.3388,
      "step": 2820
    },
    {
      "epoch": 0.173,
      "grad_norm": 0.637212336063385,
      "learning_rate": 4.752264719689961e-06,
      "loss": 0.3766,
      "step": 2821
    },
    {
      "epoch": 0.1732,
      "grad_norm": 0.4384714365005493,
      "learning_rate": 4.748778409101153e-06,
      "loss": 0.3344,
      "step": 2822
    },
    {
      "epoch": 0.1734,
      "grad_norm": 0.4870780408382416,
      "learning_rate": 4.745292220954696e-06,
      "loss": 0.3619,
      "step": 2823
    },
    {
      "epoch": 0.1736,
      "grad_norm": 0.8944578766822815,
      "learning_rate": 4.741806156949718e-06,
      "loss": 0.3558,
      "step": 2824
    },
    {
      "epoch": 0.1738,
      "grad_norm": 0.47908714413642883,
      "learning_rate": 4.738320218785281e-06,
      "loss": 0.341,
      "step": 2825
    },
    {
      "epoch": 0.174,
      "grad_norm": 0.4000164270401001,
      "learning_rate": 4.734834408160393e-06,
      "loss": 0.2887,
      "step": 2826
    },
    {
      "epoch": 0.1742,
      "grad_norm": 0.43930408358573914,
      "learning_rate": 4.73134872677399e-06,
      "loss": 0.3465,
      "step": 2827
    },
    {
      "epoch": 0.1744,
      "grad_norm": 0.7952002882957458,
      "learning_rate": 4.727863176324955e-06,
      "loss": 0.3109,
      "step": 2828
    },
    {
      "epoch": 0.1746,
      "grad_norm": 0.508614718914032,
      "learning_rate": 4.7243777585121034e-06,
      "loss": 0.3649,
      "step": 2829
    },
    {
      "epoch": 0.1748,
      "grad_norm": 0.6912397742271423,
      "learning_rate": 4.720892475034181e-06,
      "loss": 0.3379,
      "step": 2830
    },
    {
      "epoch": 0.175,
      "grad_norm": 0.5120217204093933,
      "learning_rate": 4.717407327589878e-06,
      "loss": 0.3312,
      "step": 2831
    },
    {
      "epoch": 0.1752,
      "grad_norm": 0.5083125829696655,
      "learning_rate": 4.71392231787781e-06,
      "loss": 0.3027,
      "step": 2832
    },
    {
      "epoch": 0.1754,
      "grad_norm": 0.45233604311943054,
      "learning_rate": 4.710437447596528e-06,
      "loss": 0.3347,
      "step": 2833
    },
    {
      "epoch": 0.1756,
      "grad_norm": 0.4547630548477173,
      "learning_rate": 4.706952718444518e-06,
      "loss": 0.3595,
      "step": 2834
    },
    {
      "epoch": 0.1758,
      "grad_norm": 0.5515077710151672,
      "learning_rate": 4.703468132120193e-06,
      "loss": 0.3563,
      "step": 2835
    },
    {
      "epoch": 0.176,
      "grad_norm": 0.4674878418445587,
      "learning_rate": 4.699983690321898e-06,
      "loss": 0.3528,
      "step": 2836
    },
    {
      "epoch": 0.1762,
      "grad_norm": 0.4262967109680176,
      "learning_rate": 4.696499394747906e-06,
      "loss": 0.3315,
      "step": 2837
    },
    {
      "epoch": 0.1764,
      "grad_norm": 0.7096832990646362,
      "learning_rate": 4.693015247096423e-06,
      "loss": 0.3312,
      "step": 2838
    },
    {
      "epoch": 0.1766,
      "grad_norm": 0.4787069261074066,
      "learning_rate": 4.689531249065581e-06,
      "loss": 0.313,
      "step": 2839
    },
    {
      "epoch": 0.1768,
      "grad_norm": 0.43516138195991516,
      "learning_rate": 4.686047402353433e-06,
      "loss": 0.3129,
      "step": 2840
    },
    {
      "epoch": 0.177,
      "grad_norm": 0.4113995432853699,
      "learning_rate": 4.68256370865797e-06,
      "loss": 0.3204,
      "step": 2841
    },
    {
      "epoch": 0.1772,
      "grad_norm": 0.5329024195671082,
      "learning_rate": 4.679080169677097e-06,
      "loss": 0.325,
      "step": 2842
    },
    {
      "epoch": 0.1774,
      "grad_norm": 0.4149605631828308,
      "learning_rate": 4.675596787108652e-06,
      "loss": 0.3098,
      "step": 2843
    },
    {
      "epoch": 0.1776,
      "grad_norm": 0.4084031879901886,
      "learning_rate": 4.672113562650394e-06,
      "loss": 0.3306,
      "step": 2844
    },
    {
      "epoch": 0.1778,
      "grad_norm": 0.498797208070755,
      "learning_rate": 4.668630498000001e-06,
      "loss": 0.3032,
      "step": 2845
    },
    {
      "epoch": 0.178,
      "grad_norm": 0.5218239426612854,
      "learning_rate": 4.6651475948550765e-06,
      "loss": 0.3499,
      "step": 2846
    },
    {
      "epoch": 0.1782,
      "grad_norm": 0.44862449169158936,
      "learning_rate": 4.661664854913147e-06,
      "loss": 0.3138,
      "step": 2847
    },
    {
      "epoch": 0.1784,
      "grad_norm": 0.40615928173065186,
      "learning_rate": 4.658182279871657e-06,
      "loss": 0.3328,
      "step": 2848
    },
    {
      "epoch": 0.1786,
      "grad_norm": 0.5752589702606201,
      "learning_rate": 4.654699871427972e-06,
      "loss": 0.338,
      "step": 2849
    },
    {
      "epoch": 0.1788,
      "grad_norm": 0.6078992486000061,
      "learning_rate": 4.651217631279374e-06,
      "loss": 0.3793,
      "step": 2850
    },
    {
      "epoch": 0.179,
      "grad_norm": 0.5702582001686096,
      "learning_rate": 4.6477355611230655e-06,
      "loss": 0.3499,
      "step": 2851
    },
    {
      "epoch": 0.1792,
      "grad_norm": 0.39062824845314026,
      "learning_rate": 4.644253662656167e-06,
      "loss": 0.3096,
      "step": 2852
    },
    {
      "epoch": 0.1794,
      "grad_norm": 0.5378992557525635,
      "learning_rate": 4.6407719375757095e-06,
      "loss": 0.3593,
      "step": 2853
    },
    {
      "epoch": 0.1796,
      "grad_norm": 0.4418787956237793,
      "learning_rate": 4.637290387578647e-06,
      "loss": 0.3521,
      "step": 2854
    },
    {
      "epoch": 0.1798,
      "grad_norm": 0.4857812523841858,
      "learning_rate": 4.6338090143618435e-06,
      "loss": 0.3457,
      "step": 2855
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.550596296787262,
      "learning_rate": 4.630327819622076e-06,
      "loss": 0.3518,
      "step": 2856
    },
    {
      "epoch": 0.1802,
      "grad_norm": 0.486069917678833,
      "learning_rate": 4.6268468050560394e-06,
      "loss": 0.3168,
      "step": 2857
    },
    {
      "epoch": 0.1804,
      "grad_norm": 0.45717376470565796,
      "learning_rate": 4.6233659723603374e-06,
      "loss": 0.3332,
      "step": 2858
    },
    {
      "epoch": 0.1806,
      "grad_norm": 0.5810399651527405,
      "learning_rate": 4.619885323231484e-06,
      "loss": 0.3444,
      "step": 2859
    },
    {
      "epoch": 0.1808,
      "grad_norm": 0.5325894355773926,
      "learning_rate": 4.6164048593659076e-06,
      "loss": 0.3498,
      "step": 2860
    },
    {
      "epoch": 0.181,
      "grad_norm": 0.4979740083217621,
      "learning_rate": 4.612924582459943e-06,
      "loss": 0.3539,
      "step": 2861
    },
    {
      "epoch": 0.1812,
      "grad_norm": 0.4405795633792877,
      "learning_rate": 4.609444494209834e-06,
      "loss": 0.3384,
      "step": 2862
    },
    {
      "epoch": 0.1814,
      "grad_norm": 0.4628649950027466,
      "learning_rate": 4.605964596311733e-06,
      "loss": 0.3608,
      "step": 2863
    },
    {
      "epoch": 0.1816,
      "grad_norm": 0.42742249369621277,
      "learning_rate": 4.602484890461702e-06,
      "loss": 0.323,
      "step": 2864
    },
    {
      "epoch": 0.1818,
      "grad_norm": 0.49953269958496094,
      "learning_rate": 4.5990053783557066e-06,
      "loss": 0.3206,
      "step": 2865
    },
    {
      "epoch": 0.182,
      "grad_norm": 0.4373273253440857,
      "learning_rate": 4.595526061689617e-06,
      "loss": 0.335,
      "step": 2866
    },
    {
      "epoch": 0.1822,
      "grad_norm": 0.5037556886672974,
      "learning_rate": 4.592046942159213e-06,
      "loss": 0.3262,
      "step": 2867
    },
    {
      "epoch": 0.1824,
      "grad_norm": 0.44238024950027466,
      "learning_rate": 4.588568021460172e-06,
      "loss": 0.3208,
      "step": 2868
    },
    {
      "epoch": 0.1826,
      "grad_norm": 0.4674498736858368,
      "learning_rate": 4.5850893012880806e-06,
      "loss": 0.3577,
      "step": 2869
    },
    {
      "epoch": 0.1828,
      "grad_norm": 0.6076616644859314,
      "learning_rate": 4.581610783338424e-06,
      "loss": 0.3355,
      "step": 2870
    },
    {
      "epoch": 0.183,
      "grad_norm": 0.4424092471599579,
      "learning_rate": 4.578132469306588e-06,
      "loss": 0.331,
      "step": 2871
    },
    {
      "epoch": 0.1832,
      "grad_norm": 0.6945018768310547,
      "learning_rate": 4.57465436088786e-06,
      "loss": 0.3603,
      "step": 2872
    },
    {
      "epoch": 0.1834,
      "grad_norm": 0.4556240737438202,
      "learning_rate": 4.571176459777431e-06,
      "loss": 0.3281,
      "step": 2873
    },
    {
      "epoch": 0.1836,
      "grad_norm": 0.46061187982559204,
      "learning_rate": 4.5676987676703865e-06,
      "loss": 0.3367,
      "step": 2874
    },
    {
      "epoch": 0.1838,
      "grad_norm": 0.516086757183075,
      "learning_rate": 4.564221286261709e-06,
      "loss": 0.3373,
      "step": 2875
    },
    {
      "epoch": 0.184,
      "grad_norm": 0.4440682530403137,
      "learning_rate": 4.560744017246284e-06,
      "loss": 0.3748,
      "step": 2876
    },
    {
      "epoch": 0.1842,
      "grad_norm": 0.6073085069656372,
      "learning_rate": 4.557266962318889e-06,
      "loss": 0.3447,
      "step": 2877
    },
    {
      "epoch": 0.1844,
      "grad_norm": 0.48278841376304626,
      "learning_rate": 4.553790123174198e-06,
      "loss": 0.3601,
      "step": 2878
    },
    {
      "epoch": 0.1846,
      "grad_norm": 0.44356998801231384,
      "learning_rate": 4.5503135015067815e-06,
      "loss": 0.3181,
      "step": 2879
    },
    {
      "epoch": 0.1848,
      "grad_norm": 0.4226277768611908,
      "learning_rate": 4.546837099011101e-06,
      "loss": 0.3319,
      "step": 2880
    },
    {
      "epoch": 0.185,
      "grad_norm": 0.474657267332077,
      "learning_rate": 4.543360917381512e-06,
      "loss": 0.3667,
      "step": 2881
    },
    {
      "epoch": 0.1852,
      "grad_norm": 0.4869779050350189,
      "learning_rate": 4.539884958312265e-06,
      "loss": 0.3662,
      "step": 2882
    },
    {
      "epoch": 0.1854,
      "grad_norm": 0.5020691752433777,
      "learning_rate": 4.5364092234975e-06,
      "loss": 0.3463,
      "step": 2883
    },
    {
      "epoch": 0.1856,
      "grad_norm": 0.5002124905586243,
      "learning_rate": 4.532933714631248e-06,
      "loss": 0.3241,
      "step": 2884
    },
    {
      "epoch": 0.1858,
      "grad_norm": 0.4193861484527588,
      "learning_rate": 4.529458433407429e-06,
      "loss": 0.3353,
      "step": 2885
    },
    {
      "epoch": 0.186,
      "grad_norm": 0.4058714807033539,
      "learning_rate": 4.525983381519853e-06,
      "loss": 0.3623,
      "step": 2886
    },
    {
      "epoch": 0.1862,
      "grad_norm": 0.49489203095436096,
      "learning_rate": 4.522508560662219e-06,
      "loss": 0.343,
      "step": 2887
    },
    {
      "epoch": 0.1864,
      "grad_norm": 0.8802083134651184,
      "learning_rate": 4.519033972528114e-06,
      "loss": 0.3483,
      "step": 2888
    },
    {
      "epoch": 0.1866,
      "grad_norm": 0.4132431745529175,
      "learning_rate": 4.5155596188110055e-06,
      "loss": 0.3456,
      "step": 2889
    },
    {
      "epoch": 0.1868,
      "grad_norm": 0.4433412551879883,
      "learning_rate": 4.512085501204254e-06,
      "loss": 0.3228,
      "step": 2890
    },
    {
      "epoch": 0.187,
      "grad_norm": 0.5278496742248535,
      "learning_rate": 4.508611621401102e-06,
      "loss": 0.3708,
      "step": 2891
    },
    {
      "epoch": 0.1872,
      "grad_norm": 0.5224753618240356,
      "learning_rate": 4.505137981094675e-06,
      "loss": 0.3714,
      "step": 2892
    },
    {
      "epoch": 0.1874,
      "grad_norm": 0.529327392578125,
      "learning_rate": 4.5016645819779865e-06,
      "loss": 0.3013,
      "step": 2893
    },
    {
      "epoch": 0.1876,
      "grad_norm": 0.4391004741191864,
      "learning_rate": 4.4981914257439254e-06,
      "loss": 0.2944,
      "step": 2894
    },
    {
      "epoch": 0.1878,
      "grad_norm": 0.39296072721481323,
      "learning_rate": 4.494718514085269e-06,
      "loss": 0.3245,
      "step": 2895
    },
    {
      "epoch": 0.188,
      "grad_norm": 2.009363889694214,
      "learning_rate": 4.491245848694669e-06,
      "loss": 0.321,
      "step": 2896
    },
    {
      "epoch": 0.1882,
      "grad_norm": 0.5172106027603149,
      "learning_rate": 4.487773431264664e-06,
      "loss": 0.3166,
      "step": 2897
    },
    {
      "epoch": 0.1884,
      "grad_norm": 0.43465888500213623,
      "learning_rate": 4.484301263487664e-06,
      "loss": 0.3268,
      "step": 2898
    },
    {
      "epoch": 0.1886,
      "grad_norm": 0.4351344406604767,
      "learning_rate": 4.4808293470559645e-06,
      "loss": 0.3462,
      "step": 2899
    },
    {
      "epoch": 0.1888,
      "grad_norm": 0.4631686508655548,
      "learning_rate": 4.477357683661734e-06,
      "loss": 0.3222,
      "step": 2900
    },
    {
      "epoch": 0.189,
      "grad_norm": 0.4197503626346588,
      "learning_rate": 4.473886274997018e-06,
      "loss": 0.3145,
      "step": 2901
    },
    {
      "epoch": 0.1892,
      "grad_norm": 0.5209789872169495,
      "learning_rate": 4.470415122753742e-06,
      "loss": 0.3298,
      "step": 2902
    },
    {
      "epoch": 0.1894,
      "grad_norm": 0.4882582724094391,
      "learning_rate": 4.466944228623701e-06,
      "loss": 0.331,
      "step": 2903
    },
    {
      "epoch": 0.1896,
      "grad_norm": 1.1147918701171875,
      "learning_rate": 4.463473594298567e-06,
      "loss": 0.3356,
      "step": 2904
    },
    {
      "epoch": 0.1898,
      "grad_norm": 0.575063169002533,
      "learning_rate": 4.460003221469886e-06,
      "loss": 0.3768,
      "step": 2905
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.5040742754936218,
      "learning_rate": 4.456533111829076e-06,
      "loss": 0.3533,
      "step": 2906
    },
    {
      "epoch": 0.1902,
      "grad_norm": 0.39840561151504517,
      "learning_rate": 4.453063267067424e-06,
      "loss": 0.3088,
      "step": 2907
    },
    {
      "epoch": 0.1904,
      "grad_norm": 0.4132383465766907,
      "learning_rate": 4.44959368887609e-06,
      "loss": 0.299,
      "step": 2908
    },
    {
      "epoch": 0.1906,
      "grad_norm": 0.5496099591255188,
      "learning_rate": 4.446124378946108e-06,
      "loss": 0.3652,
      "step": 2909
    },
    {
      "epoch": 0.1908,
      "grad_norm": 0.5613524317741394,
      "learning_rate": 4.442655338968373e-06,
      "loss": 0.3445,
      "step": 2910
    },
    {
      "epoch": 0.191,
      "grad_norm": 1.4753385782241821,
      "learning_rate": 4.439186570633656e-06,
      "loss": 0.3487,
      "step": 2911
    },
    {
      "epoch": 0.1912,
      "grad_norm": 0.4947108328342438,
      "learning_rate": 4.4357180756325915e-06,
      "loss": 0.3279,
      "step": 2912
    },
    {
      "epoch": 0.1914,
      "grad_norm": 0.7357821464538574,
      "learning_rate": 4.432249855655681e-06,
      "loss": 0.3313,
      "step": 2913
    },
    {
      "epoch": 0.1916,
      "grad_norm": 0.5347052216529846,
      "learning_rate": 4.428781912393299e-06,
      "loss": 0.3895,
      "step": 2914
    },
    {
      "epoch": 0.1918,
      "grad_norm": 0.49243417382240295,
      "learning_rate": 4.425314247535668e-06,
      "loss": 0.3404,
      "step": 2915
    },
    {
      "epoch": 0.192,
      "grad_norm": 0.43118932843208313,
      "learning_rate": 4.4218468627728935e-06,
      "loss": 0.3562,
      "step": 2916
    },
    {
      "epoch": 0.1922,
      "grad_norm": 0.5088269114494324,
      "learning_rate": 4.418379759794934e-06,
      "loss": 0.3948,
      "step": 2917
    },
    {
      "epoch": 0.1924,
      "grad_norm": 0.5344895720481873,
      "learning_rate": 4.414912940291614e-06,
      "loss": 0.347,
      "step": 2918
    },
    {
      "epoch": 0.1926,
      "grad_norm": 0.4394966959953308,
      "learning_rate": 4.4114464059526185e-06,
      "loss": 0.3468,
      "step": 2919
    },
    {
      "epoch": 0.1928,
      "grad_norm": 0.4084779918193817,
      "learning_rate": 4.4079801584674955e-06,
      "loss": 0.3114,
      "step": 2920
    },
    {
      "epoch": 0.193,
      "grad_norm": 0.5402647852897644,
      "learning_rate": 4.404514199525651e-06,
      "loss": 0.329,
      "step": 2921
    },
    {
      "epoch": 0.1932,
      "grad_norm": 0.6367583274841309,
      "learning_rate": 4.401048530816353e-06,
      "loss": 0.355,
      "step": 2922
    },
    {
      "epoch": 0.1934,
      "grad_norm": 0.4981566071510315,
      "learning_rate": 4.397583154028725e-06,
      "loss": 0.3179,
      "step": 2923
    },
    {
      "epoch": 0.1936,
      "grad_norm": 0.48709455132484436,
      "learning_rate": 4.394118070851749e-06,
      "loss": 0.351,
      "step": 2924
    },
    {
      "epoch": 0.1938,
      "grad_norm": 0.5187572240829468,
      "learning_rate": 4.390653282974264e-06,
      "loss": 0.3248,
      "step": 2925
    },
    {
      "epoch": 0.194,
      "grad_norm": 0.43628832697868347,
      "learning_rate": 4.387188792084967e-06,
      "loss": 0.3224,
      "step": 2926
    },
    {
      "epoch": 0.1942,
      "grad_norm": 1.4704705476760864,
      "learning_rate": 4.383724599872407e-06,
      "loss": 0.3561,
      "step": 2927
    },
    {
      "epoch": 0.1944,
      "grad_norm": 0.4714643061161041,
      "learning_rate": 4.380260708024991e-06,
      "loss": 0.3566,
      "step": 2928
    },
    {
      "epoch": 0.1946,
      "grad_norm": 0.4817892909049988,
      "learning_rate": 4.376797118230978e-06,
      "loss": 0.3783,
      "step": 2929
    },
    {
      "epoch": 0.1948,
      "grad_norm": 0.48850056529045105,
      "learning_rate": 4.373333832178478e-06,
      "loss": 0.303,
      "step": 2930
    },
    {
      "epoch": 0.195,
      "grad_norm": 0.5852121114730835,
      "learning_rate": 4.369870851555457e-06,
      "loss": 0.3446,
      "step": 2931
    },
    {
      "epoch": 0.1952,
      "grad_norm": 0.5756850838661194,
      "learning_rate": 4.366408178049728e-06,
      "loss": 0.3424,
      "step": 2932
    },
    {
      "epoch": 0.1954,
      "grad_norm": 0.5188869833946228,
      "learning_rate": 4.362945813348956e-06,
      "loss": 0.3373,
      "step": 2933
    },
    {
      "epoch": 0.1956,
      "grad_norm": 0.3679812550544739,
      "learning_rate": 4.359483759140654e-06,
      "loss": 0.2843,
      "step": 2934
    },
    {
      "epoch": 0.1958,
      "grad_norm": 0.4158618152141571,
      "learning_rate": 4.356022017112187e-06,
      "loss": 0.3046,
      "step": 2935
    },
    {
      "epoch": 0.196,
      "grad_norm": 0.5158591866493225,
      "learning_rate": 4.352560588950766e-06,
      "loss": 0.3773,
      "step": 2936
    },
    {
      "epoch": 0.1962,
      "grad_norm": 0.4254622459411621,
      "learning_rate": 4.349099476343448e-06,
      "loss": 0.3342,
      "step": 2937
    },
    {
      "epoch": 0.1964,
      "grad_norm": 0.40165627002716064,
      "learning_rate": 4.34563868097714e-06,
      "loss": 0.3035,
      "step": 2938
    },
    {
      "epoch": 0.1966,
      "grad_norm": 0.40249085426330566,
      "learning_rate": 4.342178204538588e-06,
      "loss": 0.3297,
      "step": 2939
    },
    {
      "epoch": 0.1968,
      "grad_norm": 0.470462828874588,
      "learning_rate": 4.3387180487143875e-06,
      "loss": 0.3553,
      "step": 2940
    },
    {
      "epoch": 0.197,
      "grad_norm": 0.44463875889778137,
      "learning_rate": 4.335258215190979e-06,
      "loss": 0.3321,
      "step": 2941
    },
    {
      "epoch": 0.1972,
      "grad_norm": 0.48316147923469543,
      "learning_rate": 4.331798705654639e-06,
      "loss": 0.3134,
      "step": 2942
    },
    {
      "epoch": 0.1974,
      "grad_norm": 0.49824702739715576,
      "learning_rate": 4.328339521791493e-06,
      "loss": 0.3481,
      "step": 2943
    },
    {
      "epoch": 0.1976,
      "grad_norm": 0.4668271243572235,
      "learning_rate": 4.3248806652875045e-06,
      "loss": 0.3179,
      "step": 2944
    },
    {
      "epoch": 0.1978,
      "grad_norm": 0.4262327551841736,
      "learning_rate": 4.321422137828479e-06,
      "loss": 0.3242,
      "step": 2945
    },
    {
      "epoch": 0.198,
      "grad_norm": 0.47191479802131653,
      "learning_rate": 4.317963941100059e-06,
      "loss": 0.3502,
      "step": 2946
    },
    {
      "epoch": 0.1982,
      "grad_norm": 0.5163655877113342,
      "learning_rate": 4.314506076787729e-06,
      "loss": 0.3386,
      "step": 2947
    },
    {
      "epoch": 0.1984,
      "grad_norm": 0.4475741684436798,
      "learning_rate": 4.31104854657681e-06,
      "loss": 0.3244,
      "step": 2948
    },
    {
      "epoch": 0.1986,
      "grad_norm": 0.41065025329589844,
      "learning_rate": 4.307591352152459e-06,
      "loss": 0.3722,
      "step": 2949
    },
    {
      "epoch": 0.1988,
      "grad_norm": 0.4288010597229004,
      "learning_rate": 4.304134495199675e-06,
      "loss": 0.3144,
      "step": 2950
    },
    {
      "epoch": 0.199,
      "grad_norm": 0.49918246269226074,
      "learning_rate": 4.300677977403281e-06,
      "loss": 0.343,
      "step": 2951
    },
    {
      "epoch": 0.1992,
      "grad_norm": 0.6938134431838989,
      "learning_rate": 4.297221800447946e-06,
      "loss": 0.3442,
      "step": 2952
    },
    {
      "epoch": 0.1994,
      "grad_norm": 0.4022809565067291,
      "learning_rate": 4.293765966018167e-06,
      "loss": 0.3409,
      "step": 2953
    },
    {
      "epoch": 0.1996,
      "grad_norm": 0.38700851798057556,
      "learning_rate": 4.290310475798278e-06,
      "loss": 0.338,
      "step": 2954
    },
    {
      "epoch": 0.1998,
      "grad_norm": 0.419647753238678,
      "learning_rate": 4.286855331472442e-06,
      "loss": 0.3057,
      "step": 2955
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.48301559686660767,
      "learning_rate": 4.283400534724654e-06,
      "loss": 0.3408,
      "step": 2956
    },
    {
      "epoch": 0.2002,
      "grad_norm": 0.4510580897331238,
      "learning_rate": 4.279946087238739e-06,
      "loss": 0.3478,
      "step": 2957
    },
    {
      "epoch": 0.2004,
      "grad_norm": 0.5144090056419373,
      "learning_rate": 4.2764919906983545e-06,
      "loss": 0.3074,
      "step": 2958
    },
    {
      "epoch": 0.2006,
      "grad_norm": 0.5783217549324036,
      "learning_rate": 4.273038246786986e-06,
      "loss": 0.3527,
      "step": 2959
    },
    {
      "epoch": 0.2008,
      "grad_norm": 0.4354703426361084,
      "learning_rate": 4.269584857187942e-06,
      "loss": 0.3374,
      "step": 2960
    },
    {
      "epoch": 0.201,
      "grad_norm": 0.3981773853302002,
      "learning_rate": 4.266131823584368e-06,
      "loss": 0.3381,
      "step": 2961
    },
    {
      "epoch": 0.2012,
      "grad_norm": 0.4993211328983307,
      "learning_rate": 4.262679147659227e-06,
      "loss": 0.3545,
      "step": 2962
    },
    {
      "epoch": 0.2014,
      "grad_norm": 0.3764035403728485,
      "learning_rate": 4.259226831095311e-06,
      "loss": 0.3583,
      "step": 2963
    },
    {
      "epoch": 0.2016,
      "grad_norm": 0.45676276087760925,
      "learning_rate": 4.255774875575239e-06,
      "loss": 0.3358,
      "step": 2964
    },
    {
      "epoch": 0.2018,
      "grad_norm": 0.5900163650512695,
      "learning_rate": 4.2523232827814534e-06,
      "loss": 0.3427,
      "step": 2965
    },
    {
      "epoch": 0.202,
      "grad_norm": 0.4591497778892517,
      "learning_rate": 4.248872054396215e-06,
      "loss": 0.3514,
      "step": 2966
    },
    {
      "epoch": 0.2022,
      "grad_norm": 0.4723437428474426,
      "learning_rate": 4.245421192101613e-06,
      "loss": 0.3448,
      "step": 2967
    },
    {
      "epoch": 0.2024,
      "grad_norm": 0.374528706073761,
      "learning_rate": 4.241970697579557e-06,
      "loss": 0.3146,
      "step": 2968
    },
    {
      "epoch": 0.2026,
      "grad_norm": 0.49250397086143494,
      "learning_rate": 4.238520572511773e-06,
      "loss": 0.3501,
      "step": 2969
    },
    {
      "epoch": 0.2028,
      "grad_norm": 0.4478147625923157,
      "learning_rate": 4.23507081857981e-06,
      "loss": 0.3108,
      "step": 2970
    },
    {
      "epoch": 0.203,
      "grad_norm": 0.4485551714897156,
      "learning_rate": 4.23162143746504e-06,
      "loss": 0.322,
      "step": 2971
    },
    {
      "epoch": 0.2032,
      "grad_norm": 0.4826122522354126,
      "learning_rate": 4.228172430848645e-06,
      "loss": 0.3463,
      "step": 2972
    },
    {
      "epoch": 0.2034,
      "grad_norm": 0.5413246154785156,
      "learning_rate": 4.224723800411631e-06,
      "loss": 0.3817,
      "step": 2973
    },
    {
      "epoch": 0.2036,
      "grad_norm": 0.42090609669685364,
      "learning_rate": 4.22127554783482e-06,
      "loss": 0.3457,
      "step": 2974
    },
    {
      "epoch": 0.2038,
      "grad_norm": 0.40273910760879517,
      "learning_rate": 4.217827674798845e-06,
      "loss": 0.3578,
      "step": 2975
    },
    {
      "epoch": 0.204,
      "grad_norm": 0.8166390657424927,
      "learning_rate": 4.2143801829841635e-06,
      "loss": 0.3249,
      "step": 2976
    },
    {
      "epoch": 0.2042,
      "grad_norm": 0.4190605878829956,
      "learning_rate": 4.210933074071033e-06,
      "loss": 0.3567,
      "step": 2977
    },
    {
      "epoch": 0.2044,
      "grad_norm": 0.4394036829471588,
      "learning_rate": 4.207486349739538e-06,
      "loss": 0.3318,
      "step": 2978
    },
    {
      "epoch": 0.2046,
      "grad_norm": 0.48389115929603577,
      "learning_rate": 4.204040011669567e-06,
      "loss": 0.3165,
      "step": 2979
    },
    {
      "epoch": 0.2048,
      "grad_norm": 0.44781693816185,
      "learning_rate": 4.200594061540827e-06,
      "loss": 0.321,
      "step": 2980
    },
    {
      "epoch": 0.205,
      "grad_norm": 0.5868081450462341,
      "learning_rate": 4.197148501032829e-06,
      "loss": 0.3194,
      "step": 2981
    },
    {
      "epoch": 0.2052,
      "grad_norm": 0.4543694853782654,
      "learning_rate": 4.193703331824898e-06,
      "loss": 0.3539,
      "step": 2982
    },
    {
      "epoch": 0.2054,
      "grad_norm": 0.6463409662246704,
      "learning_rate": 4.190258555596168e-06,
      "loss": 0.3584,
      "step": 2983
    },
    {
      "epoch": 0.2056,
      "grad_norm": 0.4186566174030304,
      "learning_rate": 4.186814174025582e-06,
      "loss": 0.3409,
      "step": 2984
    },
    {
      "epoch": 0.2058,
      "grad_norm": 0.434049516916275,
      "learning_rate": 4.183370188791891e-06,
      "loss": 0.3353,
      "step": 2985
    },
    {
      "epoch": 0.206,
      "grad_norm": 0.46980658173561096,
      "learning_rate": 4.179926601573645e-06,
      "loss": 0.3472,
      "step": 2986
    },
    {
      "epoch": 0.2062,
      "grad_norm": 0.3779016137123108,
      "learning_rate": 4.176483414049214e-06,
      "loss": 0.3194,
      "step": 2987
    },
    {
      "epoch": 0.2064,
      "grad_norm": 0.468124657869339,
      "learning_rate": 4.173040627896762e-06,
      "loss": 0.3467,
      "step": 2988
    },
    {
      "epoch": 0.2066,
      "grad_norm": 0.4884736239910126,
      "learning_rate": 4.169598244794261e-06,
      "loss": 0.3563,
      "step": 2989
    },
    {
      "epoch": 0.2068,
      "grad_norm": 0.8022990226745605,
      "learning_rate": 4.166156266419489e-06,
      "loss": 0.3146,
      "step": 2990
    },
    {
      "epoch": 0.207,
      "grad_norm": 0.5119194984436035,
      "learning_rate": 4.162714694450023e-06,
      "loss": 0.3398,
      "step": 2991
    },
    {
      "epoch": 0.2072,
      "grad_norm": 0.5776007771492004,
      "learning_rate": 4.159273530563243e-06,
      "loss": 0.3043,
      "step": 2992
    },
    {
      "epoch": 0.2074,
      "grad_norm": 0.3831985294818878,
      "learning_rate": 4.155832776436331e-06,
      "loss": 0.3051,
      "step": 2993
    },
    {
      "epoch": 0.2076,
      "grad_norm": 0.42763441801071167,
      "learning_rate": 4.15239243374627e-06,
      "loss": 0.3333,
      "step": 2994
    },
    {
      "epoch": 0.2078,
      "grad_norm": 0.42698535323143005,
      "learning_rate": 4.148952504169839e-06,
      "loss": 0.3354,
      "step": 2995
    },
    {
      "epoch": 0.208,
      "grad_norm": 0.47986873984336853,
      "learning_rate": 4.145512989383618e-06,
      "loss": 0.3437,
      "step": 2996
    },
    {
      "epoch": 0.2082,
      "grad_norm": 0.49033844470977783,
      "learning_rate": 4.142073891063986e-06,
      "loss": 0.368,
      "step": 2997
    },
    {
      "epoch": 0.2084,
      "grad_norm": 0.3971177041530609,
      "learning_rate": 4.138635210887117e-06,
      "loss": 0.3528,
      "step": 2998
    },
    {
      "epoch": 0.2086,
      "grad_norm": 0.4466516971588135,
      "learning_rate": 4.135196950528982e-06,
      "loss": 0.3194,
      "step": 2999
    },
    {
      "epoch": 0.2088,
      "grad_norm": 0.41756919026374817,
      "learning_rate": 4.131759111665349e-06,
      "loss": 0.3535,
      "step": 3000
    },
    {
      "epoch": 0.209,
      "grad_norm": 0.9289345145225525,
      "learning_rate": 4.128321695971775e-06,
      "loss": 0.3398,
      "step": 3001
    },
    {
      "epoch": 0.2092,
      "grad_norm": 0.7155681252479553,
      "learning_rate": 4.124884705123619e-06,
      "loss": 0.3163,
      "step": 3002
    },
    {
      "epoch": 0.2094,
      "grad_norm": 0.45473021268844604,
      "learning_rate": 4.121448140796029e-06,
      "loss": 0.3422,
      "step": 3003
    },
    {
      "epoch": 0.2096,
      "grad_norm": 0.5468836426734924,
      "learning_rate": 4.118012004663939e-06,
      "loss": 0.3429,
      "step": 3004
    },
    {
      "epoch": 0.2098,
      "grad_norm": 0.47855040431022644,
      "learning_rate": 4.114576298402085e-06,
      "loss": 0.3597,
      "step": 3005
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.4378092586994171,
      "learning_rate": 4.111141023684986e-06,
      "loss": 0.3283,
      "step": 3006
    },
    {
      "epoch": 0.2102,
      "grad_norm": 0.4485653340816498,
      "learning_rate": 4.107706182186954e-06,
      "loss": 0.3295,
      "step": 3007
    },
    {
      "epoch": 0.2104,
      "grad_norm": 0.48433351516723633,
      "learning_rate": 4.104271775582089e-06,
      "loss": 0.3668,
      "step": 3008
    },
    {
      "epoch": 0.2106,
      "grad_norm": 0.49451014399528503,
      "learning_rate": 4.100837805544279e-06,
      "loss": 0.3256,
      "step": 3009
    },
    {
      "epoch": 0.2108,
      "grad_norm": 0.48371371626853943,
      "learning_rate": 4.0974042737472005e-06,
      "loss": 0.3281,
      "step": 3010
    },
    {
      "epoch": 0.211,
      "grad_norm": 0.719059944152832,
      "learning_rate": 4.093971181864313e-06,
      "loss": 0.3024,
      "step": 3011
    },
    {
      "epoch": 0.2112,
      "grad_norm": 3.798542022705078,
      "learning_rate": 4.090538531568867e-06,
      "loss": 0.3176,
      "step": 3012
    },
    {
      "epoch": 0.2114,
      "grad_norm": 0.4909060597419739,
      "learning_rate": 4.087106324533891e-06,
      "loss": 0.3673,
      "step": 3013
    },
    {
      "epoch": 0.2116,
      "grad_norm": 0.47896361351013184,
      "learning_rate": 4.083674562432203e-06,
      "loss": 0.3034,
      "step": 3014
    },
    {
      "epoch": 0.2118,
      "grad_norm": 0.3910931348800659,
      "learning_rate": 4.0802432469364e-06,
      "loss": 0.3123,
      "step": 3015
    },
    {
      "epoch": 0.212,
      "grad_norm": 0.47927331924438477,
      "learning_rate": 4.0768123797188665e-06,
      "loss": 0.3449,
      "step": 3016
    },
    {
      "epoch": 0.2122,
      "grad_norm": 0.48097285628318787,
      "learning_rate": 4.073381962451764e-06,
      "loss": 0.3504,
      "step": 3017
    },
    {
      "epoch": 0.2124,
      "grad_norm": 0.4757404327392578,
      "learning_rate": 4.069951996807034e-06,
      "loss": 0.3261,
      "step": 3018
    },
    {
      "epoch": 0.2126,
      "grad_norm": 0.5225086212158203,
      "learning_rate": 4.066522484456406e-06,
      "loss": 0.3716,
      "step": 3019
    },
    {
      "epoch": 0.2128,
      "grad_norm": 0.4476572275161743,
      "learning_rate": 4.063093427071376e-06,
      "loss": 0.3369,
      "step": 3020
    },
    {
      "epoch": 0.213,
      "grad_norm": 0.4421020448207855,
      "learning_rate": 4.0596648263232315e-06,
      "loss": 0.3318,
      "step": 3021
    },
    {
      "epoch": 0.2132,
      "grad_norm": 0.41264966130256653,
      "learning_rate": 4.0562366838830255e-06,
      "loss": 0.3394,
      "step": 3022
    },
    {
      "epoch": 0.2134,
      "grad_norm": 0.48652467131614685,
      "learning_rate": 4.052809001421595e-06,
      "loss": 0.3188,
      "step": 3023
    },
    {
      "epoch": 0.2136,
      "grad_norm": 0.45252251625061035,
      "learning_rate": 4.0493817806095504e-06,
      "loss": 0.3326,
      "step": 3024
    },
    {
      "epoch": 0.2138,
      "grad_norm": 0.5146551728248596,
      "learning_rate": 4.045955023117276e-06,
      "loss": 0.2962,
      "step": 3025
    },
    {
      "epoch": 0.214,
      "grad_norm": 0.4284785985946655,
      "learning_rate": 4.042528730614935e-06,
      "loss": 0.3581,
      "step": 3026
    },
    {
      "epoch": 0.2142,
      "grad_norm": 0.43466559052467346,
      "learning_rate": 4.039102904772459e-06,
      "loss": 0.3361,
      "step": 3027
    },
    {
      "epoch": 0.2144,
      "grad_norm": 0.42302098870277405,
      "learning_rate": 4.035677547259555e-06,
      "loss": 0.3224,
      "step": 3028
    },
    {
      "epoch": 0.2146,
      "grad_norm": 0.4682970345020294,
      "learning_rate": 4.032252659745699e-06,
      "loss": 0.3215,
      "step": 3029
    },
    {
      "epoch": 0.2148,
      "grad_norm": 0.43234124779701233,
      "learning_rate": 4.028828243900141e-06,
      "loss": 0.3447,
      "step": 3030
    },
    {
      "epoch": 0.215,
      "grad_norm": 0.41302117705345154,
      "learning_rate": 4.025404301391898e-06,
      "loss": 0.3292,
      "step": 3031
    },
    {
      "epoch": 0.2152,
      "grad_norm": 0.45752614736557007,
      "learning_rate": 4.02198083388976e-06,
      "loss": 0.3442,
      "step": 3032
    },
    {
      "epoch": 0.2154,
      "grad_norm": 0.38169583678245544,
      "learning_rate": 4.018557843062282e-06,
      "loss": 0.2739,
      "step": 3033
    },
    {
      "epoch": 0.2156,
      "grad_norm": 0.46262651681900024,
      "learning_rate": 4.015135330577787e-06,
      "loss": 0.3052,
      "step": 3034
    },
    {
      "epoch": 0.2158,
      "grad_norm": 0.4987923502922058,
      "learning_rate": 4.0117132981043695e-06,
      "loss": 0.3373,
      "step": 3035
    },
    {
      "epoch": 0.216,
      "grad_norm": 0.4647216200828552,
      "learning_rate": 4.0082917473098845e-06,
      "loss": 0.3331,
      "step": 3036
    },
    {
      "epoch": 0.2162,
      "grad_norm": 0.6268872618675232,
      "learning_rate": 4.004870679861953e-06,
      "loss": 0.351,
      "step": 3037
    },
    {
      "epoch": 0.2164,
      "grad_norm": 0.43709224462509155,
      "learning_rate": 4.001450097427965e-06,
      "loss": 0.3544,
      "step": 3038
    },
    {
      "epoch": 0.2166,
      "grad_norm": 0.6966906785964966,
      "learning_rate": 3.9980300016750696e-06,
      "loss": 0.3564,
      "step": 3039
    },
    {
      "epoch": 0.2168,
      "grad_norm": 0.5315409302711487,
      "learning_rate": 3.994610394270178e-06,
      "loss": 0.3448,
      "step": 3040
    },
    {
      "epoch": 0.217,
      "grad_norm": 0.4513690769672394,
      "learning_rate": 3.991191276879966e-06,
      "loss": 0.3408,
      "step": 3041
    },
    {
      "epoch": 0.2172,
      "grad_norm": 0.4286937117576599,
      "learning_rate": 3.987772651170871e-06,
      "loss": 0.3749,
      "step": 3042
    },
    {
      "epoch": 0.2174,
      "grad_norm": 0.4544420540332794,
      "learning_rate": 3.98435451880909e-06,
      "loss": 0.3178,
      "step": 3043
    },
    {
      "epoch": 0.2176,
      "grad_norm": 0.36105766892433167,
      "learning_rate": 3.980936881460576e-06,
      "loss": 0.2813,
      "step": 3044
    },
    {
      "epoch": 0.2178,
      "grad_norm": 0.47014519572257996,
      "learning_rate": 3.977519740791049e-06,
      "loss": 0.3505,
      "step": 3045
    },
    {
      "epoch": 0.218,
      "grad_norm": 0.37340298295021057,
      "learning_rate": 3.974103098465976e-06,
      "loss": 0.3312,
      "step": 3046
    },
    {
      "epoch": 0.2182,
      "grad_norm": 0.5512489080429077,
      "learning_rate": 3.970686956150595e-06,
      "loss": 0.3548,
      "step": 3047
    },
    {
      "epoch": 0.2184,
      "grad_norm": 0.4361092746257782,
      "learning_rate": 3.967271315509884e-06,
      "loss": 0.3339,
      "step": 3048
    },
    {
      "epoch": 0.2186,
      "grad_norm": 0.5778906345367432,
      "learning_rate": 3.963856178208588e-06,
      "loss": 0.3355,
      "step": 3049
    },
    {
      "epoch": 0.2188,
      "grad_norm": 0.9026251435279846,
      "learning_rate": 3.960441545911205e-06,
      "loss": 0.3349,
      "step": 3050
    },
    {
      "epoch": 0.219,
      "grad_norm": 0.5260124802589417,
      "learning_rate": 3.957027420281981e-06,
      "loss": 0.3642,
      "step": 3051
    },
    {
      "epoch": 0.2192,
      "grad_norm": 0.41645967960357666,
      "learning_rate": 3.9536138029849244e-06,
      "loss": 0.3171,
      "step": 3052
    },
    {
      "epoch": 0.2194,
      "grad_norm": 0.5083152055740356,
      "learning_rate": 3.950200695683788e-06,
      "loss": 0.3241,
      "step": 3053
    },
    {
      "epoch": 0.2196,
      "grad_norm": 0.44498082995414734,
      "learning_rate": 3.94678810004208e-06,
      "loss": 0.2925,
      "step": 3054
    },
    {
      "epoch": 0.2198,
      "grad_norm": 0.49861207604408264,
      "learning_rate": 3.943376017723058e-06,
      "loss": 0.3818,
      "step": 3055
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.5556429028511047,
      "learning_rate": 3.939964450389728e-06,
      "loss": 0.3529,
      "step": 3056
    },
    {
      "epoch": 0.2202,
      "grad_norm": 0.42022597789764404,
      "learning_rate": 3.936553399704848e-06,
      "loss": 0.3375,
      "step": 3057
    },
    {
      "epoch": 0.2204,
      "grad_norm": 1.052761435508728,
      "learning_rate": 3.933142867330921e-06,
      "loss": 0.3566,
      "step": 3058
    },
    {
      "epoch": 0.2206,
      "grad_norm": 0.48119837045669556,
      "learning_rate": 3.9297328549302e-06,
      "loss": 0.3661,
      "step": 3059
    },
    {
      "epoch": 0.2208,
      "grad_norm": 0.9006600379943848,
      "learning_rate": 3.926323364164684e-06,
      "loss": 0.3405,
      "step": 3060
    },
    {
      "epoch": 0.221,
      "grad_norm": 0.5023663640022278,
      "learning_rate": 3.922914396696118e-06,
      "loss": 0.3547,
      "step": 3061
    },
    {
      "epoch": 0.2212,
      "grad_norm": 0.4543505311012268,
      "learning_rate": 3.91950595418599e-06,
      "loss": 0.3464,
      "step": 3062
    },
    {
      "epoch": 0.2214,
      "grad_norm": 0.5495287179946899,
      "learning_rate": 3.9160980382955336e-06,
      "loss": 0.336,
      "step": 3063
    },
    {
      "epoch": 0.2216,
      "grad_norm": 0.40527576208114624,
      "learning_rate": 3.912690650685726e-06,
      "loss": 0.3208,
      "step": 3064
    },
    {
      "epoch": 0.2218,
      "grad_norm": 0.47838422656059265,
      "learning_rate": 3.909283793017289e-06,
      "loss": 0.3217,
      "step": 3065
    },
    {
      "epoch": 0.222,
      "grad_norm": 1.1253273487091064,
      "learning_rate": 3.905877466950679e-06,
      "loss": 0.3331,
      "step": 3066
    },
    {
      "epoch": 0.2222,
      "grad_norm": 0.3983668386936188,
      "learning_rate": 3.902471674146099e-06,
      "loss": 0.3216,
      "step": 3067
    },
    {
      "epoch": 0.2224,
      "grad_norm": 0.5019698739051819,
      "learning_rate": 3.899066416263493e-06,
      "loss": 0.3458,
      "step": 3068
    },
    {
      "epoch": 0.2226,
      "grad_norm": 0.8546066880226135,
      "learning_rate": 3.895661694962542e-06,
      "loss": 0.3795,
      "step": 3069
    },
    {
      "epoch": 0.2228,
      "grad_norm": 0.44883719086647034,
      "learning_rate": 3.892257511902664e-06,
      "loss": 0.3368,
      "step": 3070
    },
    {
      "epoch": 0.223,
      "grad_norm": 0.5104123950004578,
      "learning_rate": 3.888853868743018e-06,
      "loss": 0.3122,
      "step": 3071
    },
    {
      "epoch": 0.2232,
      "grad_norm": 0.49295955896377563,
      "learning_rate": 3.885450767142498e-06,
      "loss": 0.3123,
      "step": 3072
    },
    {
      "epoch": 0.2234,
      "grad_norm": 0.3884437084197998,
      "learning_rate": 3.882048208759735e-06,
      "loss": 0.3198,
      "step": 3073
    },
    {
      "epoch": 0.2236,
      "grad_norm": 0.4574180543422699,
      "learning_rate": 3.8786461952530955e-06,
      "loss": 0.3293,
      "step": 3074
    },
    {
      "epoch": 0.2238,
      "grad_norm": 0.4331417381763458,
      "learning_rate": 3.875244728280676e-06,
      "loss": 0.3271,
      "step": 3075
    },
    {
      "epoch": 0.224,
      "grad_norm": 0.4602554738521576,
      "learning_rate": 3.871843809500313e-06,
      "loss": 0.3307,
      "step": 3076
    },
    {
      "epoch": 0.2242,
      "grad_norm": 0.48258471488952637,
      "learning_rate": 3.868443440569571e-06,
      "loss": 0.325,
      "step": 3077
    },
    {
      "epoch": 0.2244,
      "grad_norm": 0.40558427572250366,
      "learning_rate": 3.865043623145751e-06,
      "loss": 0.3189,
      "step": 3078
    },
    {
      "epoch": 0.2246,
      "grad_norm": 0.49175888299942017,
      "learning_rate": 3.86164435888588e-06,
      "loss": 0.3398,
      "step": 3079
    },
    {
      "epoch": 0.2248,
      "grad_norm": 0.5131886005401611,
      "learning_rate": 3.8582456494467214e-06,
      "loss": 0.3282,
      "step": 3080
    },
    {
      "epoch": 0.225,
      "grad_norm": 0.6682493090629578,
      "learning_rate": 3.854847496484762e-06,
      "loss": 0.3269,
      "step": 3081
    },
    {
      "epoch": 0.2252,
      "grad_norm": 0.41101905703544617,
      "learning_rate": 3.8514499016562216e-06,
      "loss": 0.2924,
      "step": 3082
    },
    {
      "epoch": 0.2254,
      "grad_norm": 0.3901878297328949,
      "learning_rate": 3.8480528666170495e-06,
      "loss": 0.3245,
      "step": 3083
    },
    {
      "epoch": 0.2256,
      "grad_norm": 0.46946242451667786,
      "learning_rate": 3.844656393022912e-06,
      "loss": 0.3433,
      "step": 3084
    },
    {
      "epoch": 0.2258,
      "grad_norm": 0.4512695372104645,
      "learning_rate": 3.841260482529215e-06,
      "loss": 0.3503,
      "step": 3085
    },
    {
      "epoch": 0.226,
      "grad_norm": 0.465350478887558,
      "learning_rate": 3.83786513679108e-06,
      "loss": 0.3454,
      "step": 3086
    },
    {
      "epoch": 0.2262,
      "grad_norm": 0.4489268362522125,
      "learning_rate": 3.834470357463362e-06,
      "loss": 0.3553,
      "step": 3087
    },
    {
      "epoch": 0.2264,
      "grad_norm": 0.5663504600524902,
      "learning_rate": 3.831076146200633e-06,
      "loss": 0.3334,
      "step": 3088
    },
    {
      "epoch": 0.2266,
      "grad_norm": 0.4446065425872803,
      "learning_rate": 3.827682504657187e-06,
      "loss": 0.3475,
      "step": 3089
    },
    {
      "epoch": 0.2268,
      "grad_norm": 0.479457288980484,
      "learning_rate": 3.82428943448705e-06,
      "loss": 0.3405,
      "step": 3090
    },
    {
      "epoch": 0.227,
      "grad_norm": 0.45597097277641296,
      "learning_rate": 3.820896937343959e-06,
      "loss": 0.3614,
      "step": 3091
    },
    {
      "epoch": 0.2272,
      "grad_norm": 0.6311570405960083,
      "learning_rate": 3.817505014881378e-06,
      "loss": 0.3355,
      "step": 3092
    },
    {
      "epoch": 0.2274,
      "grad_norm": 0.4560350775718689,
      "learning_rate": 3.814113668752486e-06,
      "loss": 0.3159,
      "step": 3093
    },
    {
      "epoch": 0.2276,
      "grad_norm": 0.41623812913894653,
      "learning_rate": 3.810722900610186e-06,
      "loss": 0.3394,
      "step": 3094
    },
    {
      "epoch": 0.2278,
      "grad_norm": 0.5184330344200134,
      "learning_rate": 3.8073327121070968e-06,
      "loss": 0.3759,
      "step": 3095
    },
    {
      "epoch": 0.228,
      "grad_norm": 0.45617496967315674,
      "learning_rate": 3.8039431048955537e-06,
      "loss": 0.3107,
      "step": 3096
    },
    {
      "epoch": 0.2282,
      "grad_norm": 0.47178298234939575,
      "learning_rate": 3.8005540806276132e-06,
      "loss": 0.3518,
      "step": 3097
    },
    {
      "epoch": 0.2284,
      "grad_norm": 0.5564813613891602,
      "learning_rate": 3.797165640955041e-06,
      "loss": 0.3491,
      "step": 3098
    },
    {
      "epoch": 0.2286,
      "grad_norm": 0.4497515857219696,
      "learning_rate": 3.793777787529325e-06,
      "loss": 0.3028,
      "step": 3099
    },
    {
      "epoch": 0.2288,
      "grad_norm": 0.49538421630859375,
      "learning_rate": 3.790390522001662e-06,
      "loss": 0.348,
      "step": 3100
    },
    {
      "epoch": 0.229,
      "grad_norm": 0.407399982213974,
      "learning_rate": 3.787003846022964e-06,
      "loss": 0.3485,
      "step": 3101
    },
    {
      "epoch": 0.2292,
      "grad_norm": 0.43696653842926025,
      "learning_rate": 3.7836177612438557e-06,
      "loss": 0.3156,
      "step": 3102
    },
    {
      "epoch": 0.2294,
      "grad_norm": 0.43811067938804626,
      "learning_rate": 3.7802322693146726e-06,
      "loss": 0.3562,
      "step": 3103
    },
    {
      "epoch": 0.2296,
      "grad_norm": 0.8258863687515259,
      "learning_rate": 3.776847371885464e-06,
      "loss": 0.3256,
      "step": 3104
    },
    {
      "epoch": 0.2298,
      "grad_norm": 0.5058077573776245,
      "learning_rate": 3.7734630706059873e-06,
      "loss": 0.3583,
      "step": 3105
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.49526864290237427,
      "learning_rate": 3.77007936712571e-06,
      "loss": 0.3603,
      "step": 3106
    },
    {
      "epoch": 0.2302,
      "grad_norm": 0.4949630796909332,
      "learning_rate": 3.7666962630938084e-06,
      "loss": 0.3304,
      "step": 3107
    },
    {
      "epoch": 0.2304,
      "grad_norm": 0.4383028745651245,
      "learning_rate": 3.7633137601591647e-06,
      "loss": 0.3331,
      "step": 3108
    },
    {
      "epoch": 0.2306,
      "grad_norm": 0.4249805510044098,
      "learning_rate": 3.759931859970374e-06,
      "loss": 0.3455,
      "step": 3109
    },
    {
      "epoch": 0.2308,
      "grad_norm": 0.5119044780731201,
      "learning_rate": 3.756550564175727e-06,
      "loss": 0.3419,
      "step": 3110
    },
    {
      "epoch": 0.231,
      "grad_norm": 0.41380709409713745,
      "learning_rate": 3.7531698744232307e-06,
      "loss": 0.3225,
      "step": 3111
    },
    {
      "epoch": 0.2312,
      "grad_norm": 0.5212100148200989,
      "learning_rate": 3.74978979236059e-06,
      "loss": 0.3212,
      "step": 3112
    },
    {
      "epoch": 0.2314,
      "grad_norm": 0.42713963985443115,
      "learning_rate": 3.7464103196352176e-06,
      "loss": 0.3033,
      "step": 3113
    },
    {
      "epoch": 0.2316,
      "grad_norm": 0.567656934261322,
      "learning_rate": 3.7430314578942263e-06,
      "loss": 0.3434,
      "step": 3114
    },
    {
      "epoch": 0.2318,
      "grad_norm": 0.44234275817871094,
      "learning_rate": 3.7396532087844318e-06,
      "loss": 0.3365,
      "step": 3115
    },
    {
      "epoch": 0.232,
      "grad_norm": 0.4394865036010742,
      "learning_rate": 3.736275573952354e-06,
      "loss": 0.3247,
      "step": 3116
    },
    {
      "epoch": 0.2322,
      "grad_norm": 0.42738214135169983,
      "learning_rate": 3.7328985550442086e-06,
      "loss": 0.3368,
      "step": 3117
    },
    {
      "epoch": 0.2324,
      "grad_norm": 0.4162706732749939,
      "learning_rate": 3.7295221537059162e-06,
      "loss": 0.3328,
      "step": 3118
    },
    {
      "epoch": 0.2326,
      "grad_norm": 0.48458680510520935,
      "learning_rate": 3.7261463715830902e-06,
      "loss": 0.3262,
      "step": 3119
    },
    {
      "epoch": 0.2328,
      "grad_norm": 0.46137234568595886,
      "learning_rate": 3.7227712103210485e-06,
      "loss": 0.3465,
      "step": 3120
    },
    {
      "epoch": 0.233,
      "grad_norm": 0.5204905271530151,
      "learning_rate": 3.7193966715648026e-06,
      "loss": 0.3442,
      "step": 3121
    },
    {
      "epoch": 0.2332,
      "grad_norm": 0.45307910442352295,
      "learning_rate": 3.716022756959061e-06,
      "loss": 0.3289,
      "step": 3122
    },
    {
      "epoch": 0.2334,
      "grad_norm": 0.4900096654891968,
      "learning_rate": 3.7126494681482317e-06,
      "loss": 0.3214,
      "step": 3123
    },
    {
      "epoch": 0.2336,
      "grad_norm": 0.4860491156578064,
      "learning_rate": 3.709276806776412e-06,
      "loss": 0.3488,
      "step": 3124
    },
    {
      "epoch": 0.2338,
      "grad_norm": 0.5519530773162842,
      "learning_rate": 3.705904774487396e-06,
      "loss": 0.365,
      "step": 3125
    },
    {
      "epoch": 0.234,
      "grad_norm": 0.4081253707408905,
      "learning_rate": 3.7025333729246733e-06,
      "loss": 0.3419,
      "step": 3126
    },
    {
      "epoch": 0.2342,
      "grad_norm": 0.5223135948181152,
      "learning_rate": 3.699162603731423e-06,
      "loss": 0.3625,
      "step": 3127
    },
    {
      "epoch": 0.2344,
      "grad_norm": 0.8162585496902466,
      "learning_rate": 3.695792468550517e-06,
      "loss": 0.3504,
      "step": 3128
    },
    {
      "epoch": 0.2346,
      "grad_norm": 0.4509448707103729,
      "learning_rate": 3.6924229690245163e-06,
      "loss": 0.3444,
      "step": 3129
    },
    {
      "epoch": 0.2348,
      "grad_norm": 0.5873705744743347,
      "learning_rate": 3.6890541067956775e-06,
      "loss": 0.3366,
      "step": 3130
    },
    {
      "epoch": 0.235,
      "grad_norm": 0.4147254526615143,
      "learning_rate": 3.68568588350594e-06,
      "loss": 0.3253,
      "step": 3131
    },
    {
      "epoch": 0.2352,
      "grad_norm": 0.4585094749927521,
      "learning_rate": 3.6823183007969375e-06,
      "loss": 0.341,
      "step": 3132
    },
    {
      "epoch": 0.2354,
      "grad_norm": 0.4281623959541321,
      "learning_rate": 3.678951360309988e-06,
      "loss": 0.3477,
      "step": 3133
    },
    {
      "epoch": 0.2356,
      "grad_norm": 0.6129847764968872,
      "learning_rate": 3.6755850636860956e-06,
      "loss": 0.3367,
      "step": 3134
    },
    {
      "epoch": 0.2358,
      "grad_norm": 0.4039444923400879,
      "learning_rate": 3.672219412565956e-06,
      "loss": 0.3236,
      "step": 3135
    },
    {
      "epoch": 0.236,
      "grad_norm": 0.4613497257232666,
      "learning_rate": 3.668854408589945e-06,
      "loss": 0.3126,
      "step": 3136
    },
    {
      "epoch": 0.2362,
      "grad_norm": 0.4345828890800476,
      "learning_rate": 3.6654900533981234e-06,
      "loss": 0.3142,
      "step": 3137
    },
    {
      "epoch": 0.2364,
      "grad_norm": 0.43299978971481323,
      "learning_rate": 3.6621263486302373e-06,
      "loss": 0.3173,
      "step": 3138
    },
    {
      "epoch": 0.2366,
      "grad_norm": 0.4948212504386902,
      "learning_rate": 3.6587632959257168e-06,
      "loss": 0.329,
      "step": 3139
    },
    {
      "epoch": 0.2368,
      "grad_norm": 0.6538234949111938,
      "learning_rate": 3.655400896923672e-06,
      "loss": 0.3382,
      "step": 3140
    },
    {
      "epoch": 0.237,
      "grad_norm": 0.43387308716773987,
      "learning_rate": 3.6520391532628953e-06,
      "loss": 0.2622,
      "step": 3141
    },
    {
      "epoch": 0.2372,
      "grad_norm": 0.6361921429634094,
      "learning_rate": 3.648678066581861e-06,
      "loss": 0.362,
      "step": 3142
    },
    {
      "epoch": 0.2374,
      "grad_norm": 0.48882192373275757,
      "learning_rate": 3.645317638518721e-06,
      "loss": 0.302,
      "step": 3143
    },
    {
      "epoch": 0.2376,
      "grad_norm": 0.3658190369606018,
      "learning_rate": 3.6419578707113055e-06,
      "loss": 0.3488,
      "step": 3144
    },
    {
      "epoch": 0.2378,
      "grad_norm": 0.4246411621570587,
      "learning_rate": 3.6385987647971287e-06,
      "loss": 0.3374,
      "step": 3145
    },
    {
      "epoch": 0.238,
      "grad_norm": 0.46895426511764526,
      "learning_rate": 3.635240322413375e-06,
      "loss": 0.356,
      "step": 3146
    },
    {
      "epoch": 0.2382,
      "grad_norm": 0.4753226041793823,
      "learning_rate": 3.6318825451969085e-06,
      "loss": 0.328,
      "step": 3147
    },
    {
      "epoch": 0.2384,
      "grad_norm": 0.4594697654247284,
      "learning_rate": 3.628525434784268e-06,
      "loss": 0.3265,
      "step": 3148
    },
    {
      "epoch": 0.2386,
      "grad_norm": 0.44035303592681885,
      "learning_rate": 3.625168992811671e-06,
      "loss": 0.3232,
      "step": 3149
    },
    {
      "epoch": 0.2388,
      "grad_norm": 0.47867465019226074,
      "learning_rate": 3.6218132209150047e-06,
      "loss": 0.36,
      "step": 3150
    },
    {
      "epoch": 0.239,
      "grad_norm": 0.4613383114337921,
      "learning_rate": 3.618458120729832e-06,
      "loss": 0.3683,
      "step": 3151
    },
    {
      "epoch": 0.2392,
      "grad_norm": 0.39620545506477356,
      "learning_rate": 3.6151036938913887e-06,
      "loss": 0.3417,
      "step": 3152
    },
    {
      "epoch": 0.2394,
      "grad_norm": 0.4843883216381073,
      "learning_rate": 3.61174994203458e-06,
      "loss": 0.3302,
      "step": 3153
    },
    {
      "epoch": 0.2396,
      "grad_norm": 0.6936995387077332,
      "learning_rate": 3.608396866793988e-06,
      "loss": 0.3423,
      "step": 3154
    },
    {
      "epoch": 0.2398,
      "grad_norm": 0.5248640179634094,
      "learning_rate": 3.6050444698038547e-06,
      "loss": 0.3535,
      "step": 3155
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.47451260685920715,
      "learning_rate": 3.6016927526981014e-06,
      "loss": 0.3836,
      "step": 3156
    },
    {
      "epoch": 0.2402,
      "grad_norm": 0.4643985629081726,
      "learning_rate": 3.598341717110313e-06,
      "loss": 0.3197,
      "step": 3157
    },
    {
      "epoch": 0.2404,
      "grad_norm": 0.5552570819854736,
      "learning_rate": 3.5949913646737456e-06,
      "loss": 0.3323,
      "step": 3158
    },
    {
      "epoch": 0.2406,
      "grad_norm": 0.47465234994888306,
      "learning_rate": 3.5916416970213173e-06,
      "loss": 0.3186,
      "step": 3159
    },
    {
      "epoch": 0.2408,
      "grad_norm": 0.43572142720222473,
      "learning_rate": 3.5882927157856175e-06,
      "loss": 0.3525,
      "step": 3160
    },
    {
      "epoch": 0.241,
      "grad_norm": 0.46447405219078064,
      "learning_rate": 3.584944422598899e-06,
      "loss": 0.3328,
      "step": 3161
    },
    {
      "epoch": 0.2412,
      "grad_norm": 0.55682772397995,
      "learning_rate": 3.5815968190930793e-06,
      "loss": 0.3395,
      "step": 3162
    },
    {
      "epoch": 0.2414,
      "grad_norm": 0.4282892942428589,
      "learning_rate": 3.5782499068997386e-06,
      "loss": 0.3089,
      "step": 3163
    },
    {
      "epoch": 0.2416,
      "grad_norm": 0.4438365399837494,
      "learning_rate": 3.5749036876501196e-06,
      "loss": 0.338,
      "step": 3164
    },
    {
      "epoch": 0.2418,
      "grad_norm": 0.6473256349563599,
      "learning_rate": 3.571558162975133e-06,
      "loss": 0.3362,
      "step": 3165
    },
    {
      "epoch": 0.242,
      "grad_norm": 0.43566831946372986,
      "learning_rate": 3.568213334505345e-06,
      "loss": 0.3002,
      "step": 3166
    },
    {
      "epoch": 0.2422,
      "grad_norm": 0.4925442039966583,
      "learning_rate": 3.564869203870982e-06,
      "loss": 0.3035,
      "step": 3167
    },
    {
      "epoch": 0.2424,
      "grad_norm": 0.4525102376937866,
      "learning_rate": 3.561525772701937e-06,
      "loss": 0.3423,
      "step": 3168
    },
    {
      "epoch": 0.2426,
      "grad_norm": 0.3881657123565674,
      "learning_rate": 3.5581830426277554e-06,
      "loss": 0.3005,
      "step": 3169
    },
    {
      "epoch": 0.2428,
      "grad_norm": 0.7047293782234192,
      "learning_rate": 3.5548410152776414e-06,
      "loss": 0.328,
      "step": 3170
    },
    {
      "epoch": 0.243,
      "grad_norm": 0.463875949382782,
      "learning_rate": 3.5514996922804636e-06,
      "loss": 0.3291,
      "step": 3171
    },
    {
      "epoch": 0.2432,
      "grad_norm": 0.4240015745162964,
      "learning_rate": 3.548159075264738e-06,
      "loss": 0.3406,
      "step": 3172
    },
    {
      "epoch": 0.2434,
      "grad_norm": 0.4745037257671356,
      "learning_rate": 3.5448191658586423e-06,
      "loss": 0.3499,
      "step": 3173
    },
    {
      "epoch": 0.2436,
      "grad_norm": 0.41553595662117004,
      "learning_rate": 3.5414799656900057e-06,
      "loss": 0.3438,
      "step": 3174
    },
    {
      "epoch": 0.2438,
      "grad_norm": 0.46560925245285034,
      "learning_rate": 3.538141476386317e-06,
      "loss": 0.3314,
      "step": 3175
    },
    {
      "epoch": 0.244,
      "grad_norm": 0.47946834564208984,
      "learning_rate": 3.5348036995747135e-06,
      "loss": 0.3634,
      "step": 3176
    },
    {
      "epoch": 0.2442,
      "grad_norm": 0.45396819710731506,
      "learning_rate": 3.531466636881987e-06,
      "loss": 0.3217,
      "step": 3177
    },
    {
      "epoch": 0.2444,
      "grad_norm": 0.4372910261154175,
      "learning_rate": 3.5281302899345825e-06,
      "loss": 0.3316,
      "step": 3178
    },
    {
      "epoch": 0.2446,
      "grad_norm": 0.43958958983421326,
      "learning_rate": 3.524794660358593e-06,
      "loss": 0.3645,
      "step": 3179
    },
    {
      "epoch": 0.2448,
      "grad_norm": 0.5460183620452881,
      "learning_rate": 3.521459749779769e-06,
      "loss": 0.3501,
      "step": 3180
    },
    {
      "epoch": 0.245,
      "grad_norm": 0.4080565273761749,
      "learning_rate": 3.5181255598234963e-06,
      "loss": 0.3115,
      "step": 3181
    },
    {
      "epoch": 0.2452,
      "grad_norm": 0.42424243688583374,
      "learning_rate": 3.5147920921148267e-06,
      "loss": 0.3697,
      "step": 3182
    },
    {
      "epoch": 0.2454,
      "grad_norm": 0.6613327264785767,
      "learning_rate": 3.511459348278448e-06,
      "loss": 0.304,
      "step": 3183
    },
    {
      "epoch": 0.2456,
      "grad_norm": 0.4815514087677002,
      "learning_rate": 3.508127329938699e-06,
      "loss": 0.3493,
      "step": 3184
    },
    {
      "epoch": 0.2458,
      "grad_norm": 0.518894374370575,
      "learning_rate": 3.5047960387195673e-06,
      "loss": 0.3352,
      "step": 3185
    },
    {
      "epoch": 0.246,
      "grad_norm": 0.42936673760414124,
      "learning_rate": 3.501465476244681e-06,
      "loss": 0.3661,
      "step": 3186
    },
    {
      "epoch": 0.2462,
      "grad_norm": 0.5295752882957458,
      "learning_rate": 3.498135644137318e-06,
      "loss": 0.3404,
      "step": 3187
    },
    {
      "epoch": 0.2464,
      "grad_norm": 0.5287970304489136,
      "learning_rate": 3.4948065440203982e-06,
      "loss": 0.3311,
      "step": 3188
    },
    {
      "epoch": 0.2466,
      "grad_norm": 0.4152123034000397,
      "learning_rate": 3.491478177516484e-06,
      "loss": 0.3356,
      "step": 3189
    },
    {
      "epoch": 0.2468,
      "grad_norm": 0.43020713329315186,
      "learning_rate": 3.488150546247778e-06,
      "loss": 0.3372,
      "step": 3190
    },
    {
      "epoch": 0.247,
      "grad_norm": 0.6143569350242615,
      "learning_rate": 3.484823651836131e-06,
      "loss": 0.3676,
      "step": 3191
    },
    {
      "epoch": 0.2472,
      "grad_norm": 0.49024662375450134,
      "learning_rate": 3.4814974959030294e-06,
      "loss": 0.3274,
      "step": 3192
    },
    {
      "epoch": 0.2474,
      "grad_norm": 0.4304406940937042,
      "learning_rate": 3.4781720800696006e-06,
      "loss": 0.3353,
      "step": 3193
    },
    {
      "epoch": 0.2476,
      "grad_norm": 0.6003841757774353,
      "learning_rate": 3.474847405956613e-06,
      "loss": 0.3326,
      "step": 3194
    },
    {
      "epoch": 0.2478,
      "grad_norm": 0.664505660533905,
      "learning_rate": 3.471523475184472e-06,
      "loss": 0.3326,
      "step": 3195
    },
    {
      "epoch": 0.248,
      "grad_norm": 0.423743337392807,
      "learning_rate": 3.4682002893732203e-06,
      "loss": 0.3331,
      "step": 3196
    },
    {
      "epoch": 0.2482,
      "grad_norm": 0.5209110379219055,
      "learning_rate": 3.464877850142541e-06,
      "loss": 0.3104,
      "step": 3197
    },
    {
      "epoch": 0.2484,
      "grad_norm": 0.6301365494728088,
      "learning_rate": 3.4615561591117486e-06,
      "loss": 0.352,
      "step": 3198
    },
    {
      "epoch": 0.2486,
      "grad_norm": 0.45551207661628723,
      "learning_rate": 3.4582352178997937e-06,
      "loss": 0.3504,
      "step": 3199
    },
    {
      "epoch": 0.2488,
      "grad_norm": 0.41838324069976807,
      "learning_rate": 3.4549150281252635e-06,
      "loss": 0.319,
      "step": 3200
    },
    {
      "epoch": 0.249,
      "grad_norm": 0.645295262336731,
      "learning_rate": 3.4515955914063796e-06,
      "loss": 0.298,
      "step": 3201
    },
    {
      "epoch": 0.2492,
      "grad_norm": 0.40125057101249695,
      "learning_rate": 3.4482769093609945e-06,
      "loss": 0.3067,
      "step": 3202
    },
    {
      "epoch": 0.2494,
      "grad_norm": 0.4765092730522156,
      "learning_rate": 3.444958983606592e-06,
      "loss": 0.317,
      "step": 3203
    },
    {
      "epoch": 0.2496,
      "grad_norm": 0.4690178334712982,
      "learning_rate": 3.441641815760291e-06,
      "loss": 0.3567,
      "step": 3204
    },
    {
      "epoch": 0.2498,
      "grad_norm": 0.4364680051803589,
      "learning_rate": 3.4383254074388373e-06,
      "loss": 0.3152,
      "step": 3205
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.513806939125061,
      "learning_rate": 3.4350097602586085e-06,
      "loss": 0.3439,
      "step": 3206
    },
    {
      "epoch": 0.2502,
      "grad_norm": 0.6901730895042419,
      "learning_rate": 3.4316948758356127e-06,
      "loss": 0.2981,
      "step": 3207
    },
    {
      "epoch": 0.2504,
      "grad_norm": 0.4385497272014618,
      "learning_rate": 3.4283807557854814e-06,
      "loss": 0.3569,
      "step": 3208
    },
    {
      "epoch": 0.2506,
      "grad_norm": 0.5129398107528687,
      "learning_rate": 3.4250674017234774e-06,
      "loss": 0.359,
      "step": 3209
    },
    {
      "epoch": 0.2508,
      "grad_norm": 0.4663659632205963,
      "learning_rate": 3.4217548152644887e-06,
      "loss": 0.3098,
      "step": 3210
    },
    {
      "epoch": 0.251,
      "grad_norm": 0.5066407322883606,
      "learning_rate": 3.4184429980230305e-06,
      "loss": 0.2863,
      "step": 3211
    },
    {
      "epoch": 0.2512,
      "grad_norm": 0.44769981503486633,
      "learning_rate": 3.4151319516132414e-06,
      "loss": 0.3597,
      "step": 3212
    },
    {
      "epoch": 0.2514,
      "grad_norm": 0.4383269250392914,
      "learning_rate": 3.411821677648887e-06,
      "loss": 0.3541,
      "step": 3213
    },
    {
      "epoch": 0.2516,
      "grad_norm": 0.589823842048645,
      "learning_rate": 3.4085121777433532e-06,
      "loss": 0.3403,
      "step": 3214
    },
    {
      "epoch": 0.2518,
      "grad_norm": 0.466137558221817,
      "learning_rate": 3.40520345350965e-06,
      "loss": 0.3384,
      "step": 3215
    },
    {
      "epoch": 0.252,
      "grad_norm": 0.4169074296951294,
      "learning_rate": 3.401895506560411e-06,
      "loss": 0.3435,
      "step": 3216
    },
    {
      "epoch": 0.2522,
      "grad_norm": 0.3819303810596466,
      "learning_rate": 3.3985883385078875e-06,
      "loss": 0.3215,
      "step": 3217
    },
    {
      "epoch": 0.2524,
      "grad_norm": 0.639476478099823,
      "learning_rate": 3.3952819509639534e-06,
      "loss": 0.3511,
      "step": 3218
    },
    {
      "epoch": 0.2526,
      "grad_norm": 0.5598458647727966,
      "learning_rate": 3.3919763455401016e-06,
      "loss": 0.325,
      "step": 3219
    },
    {
      "epoch": 0.2528,
      "grad_norm": 0.5305518507957458,
      "learning_rate": 3.3886715238474454e-06,
      "loss": 0.3681,
      "step": 3220
    },
    {
      "epoch": 0.253,
      "grad_norm": 0.4249856770038605,
      "learning_rate": 3.3853674874967134e-06,
      "loss": 0.3118,
      "step": 3221
    },
    {
      "epoch": 0.2532,
      "grad_norm": 0.5232000946998596,
      "learning_rate": 3.3820642380982527e-06,
      "loss": 0.3361,
      "step": 3222
    },
    {
      "epoch": 0.2534,
      "grad_norm": 0.4586060345172882,
      "learning_rate": 3.378761777262028e-06,
      "loss": 0.3415,
      "step": 3223
    },
    {
      "epoch": 0.2536,
      "grad_norm": 0.5578935742378235,
      "learning_rate": 3.375460106597619e-06,
      "loss": 0.3357,
      "step": 3224
    },
    {
      "epoch": 0.2538,
      "grad_norm": 0.8873924612998962,
      "learning_rate": 3.372159227714218e-06,
      "loss": 0.3293,
      "step": 3225
    },
    {
      "epoch": 0.254,
      "grad_norm": 0.42735061049461365,
      "learning_rate": 3.3688591422206333e-06,
      "loss": 0.3198,
      "step": 3226
    },
    {
      "epoch": 0.2542,
      "grad_norm": 0.4181356132030487,
      "learning_rate": 3.3655598517252886e-06,
      "loss": 0.3163,
      "step": 3227
    },
    {
      "epoch": 0.2544,
      "grad_norm": 0.4422154724597931,
      "learning_rate": 3.3622613578362162e-06,
      "loss": 0.3101,
      "step": 3228
    },
    {
      "epoch": 0.2546,
      "grad_norm": 0.5334792733192444,
      "learning_rate": 3.358963662161062e-06,
      "loss": 0.3002,
      "step": 3229
    },
    {
      "epoch": 0.2548,
      "grad_norm": 0.5126291513442993,
      "learning_rate": 3.355666766307084e-06,
      "loss": 0.3379,
      "step": 3230
    },
    {
      "epoch": 0.255,
      "grad_norm": 0.39795738458633423,
      "learning_rate": 3.352370671881148e-06,
      "loss": 0.2903,
      "step": 3231
    },
    {
      "epoch": 0.2552,
      "grad_norm": 0.42319774627685547,
      "learning_rate": 3.3490753804897315e-06,
      "loss": 0.3249,
      "step": 3232
    },
    {
      "epoch": 0.2554,
      "grad_norm": 0.4629117548465729,
      "learning_rate": 3.34578089373892e-06,
      "loss": 0.3061,
      "step": 3233
    },
    {
      "epoch": 0.2556,
      "grad_norm": 0.49471503496170044,
      "learning_rate": 3.3424872132344044e-06,
      "loss": 0.3167,
      "step": 3234
    },
    {
      "epoch": 0.2558,
      "grad_norm": 0.48614123463630676,
      "learning_rate": 3.339194340581485e-06,
      "loss": 0.3585,
      "step": 3235
    },
    {
      "epoch": 0.256,
      "grad_norm": 0.4607694745063782,
      "learning_rate": 3.3359022773850673e-06,
      "loss": 0.3659,
      "step": 3236
    },
    {
      "epoch": 0.2562,
      "grad_norm": 0.4480232298374176,
      "learning_rate": 3.3326110252496652e-06,
      "loss": 0.3438,
      "step": 3237
    },
    {
      "epoch": 0.2564,
      "grad_norm": 0.4444412887096405,
      "learning_rate": 3.3293205857793924e-06,
      "loss": 0.3344,
      "step": 3238
    },
    {
      "epoch": 0.2566,
      "grad_norm": 0.5199095010757446,
      "learning_rate": 3.3260309605779717e-06,
      "loss": 0.3416,
      "step": 3239
    },
    {
      "epoch": 0.2568,
      "grad_norm": 0.3740561008453369,
      "learning_rate": 3.322742151248726e-06,
      "loss": 0.3333,
      "step": 3240
    },
    {
      "epoch": 0.257,
      "grad_norm": 0.42845597863197327,
      "learning_rate": 3.319454159394578e-06,
      "loss": 0.3435,
      "step": 3241
    },
    {
      "epoch": 0.2572,
      "grad_norm": 0.5481618046760559,
      "learning_rate": 3.31616698661806e-06,
      "loss": 0.3504,
      "step": 3242
    },
    {
      "epoch": 0.2574,
      "grad_norm": 0.46177321672439575,
      "learning_rate": 3.312880634521295e-06,
      "loss": 0.33,
      "step": 3243
    },
    {
      "epoch": 0.2576,
      "grad_norm": 0.43108803033828735,
      "learning_rate": 3.3095951047060147e-06,
      "loss": 0.3518,
      "step": 3244
    },
    {
      "epoch": 0.2578,
      "grad_norm": 0.46464601159095764,
      "learning_rate": 3.3063103987735433e-06,
      "loss": 0.3233,
      "step": 3245
    },
    {
      "epoch": 0.258,
      "grad_norm": 0.3920610845088959,
      "learning_rate": 3.30302651832481e-06,
      "loss": 0.2814,
      "step": 3246
    },
    {
      "epoch": 0.2582,
      "grad_norm": 0.5803430676460266,
      "learning_rate": 3.2997434649603368e-06,
      "loss": 0.3287,
      "step": 3247
    },
    {
      "epoch": 0.2584,
      "grad_norm": 0.5503724813461304,
      "learning_rate": 3.2964612402802422e-06,
      "loss": 0.3438,
      "step": 3248
    },
    {
      "epoch": 0.2586,
      "grad_norm": 0.39588066935539246,
      "learning_rate": 3.293179845884245e-06,
      "loss": 0.2983,
      "step": 3249
    },
    {
      "epoch": 0.2588,
      "grad_norm": 0.4400998055934906,
      "learning_rate": 3.289899283371657e-06,
      "loss": 0.3639,
      "step": 3250
    },
    {
      "epoch": 0.259,
      "grad_norm": 0.7116576433181763,
      "learning_rate": 3.2866195543413843e-06,
      "loss": 0.3308,
      "step": 3251
    },
    {
      "epoch": 0.2592,
      "grad_norm": 0.4764927625656128,
      "learning_rate": 3.2833406603919243e-06,
      "loss": 0.332,
      "step": 3252
    },
    {
      "epoch": 0.2594,
      "grad_norm": 0.40691685676574707,
      "learning_rate": 3.280062603121373e-06,
      "loss": 0.3195,
      "step": 3253
    },
    {
      "epoch": 0.2596,
      "grad_norm": 0.4043067693710327,
      "learning_rate": 3.2767853841274154e-06,
      "loss": 0.3244,
      "step": 3254
    },
    {
      "epoch": 0.2598,
      "grad_norm": 0.4259621202945709,
      "learning_rate": 3.273509005007327e-06,
      "loss": 0.3116,
      "step": 3255
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.4232081174850464,
      "learning_rate": 3.2702334673579765e-06,
      "loss": 0.3041,
      "step": 3256
    },
    {
      "epoch": 0.2602,
      "grad_norm": 0.4110788106918335,
      "learning_rate": 3.26695877277582e-06,
      "loss": 0.3256,
      "step": 3257
    },
    {
      "epoch": 0.2604,
      "grad_norm": 0.4261535406112671,
      "learning_rate": 3.263684922856905e-06,
      "loss": 0.3374,
      "step": 3258
    },
    {
      "epoch": 0.2606,
      "grad_norm": 0.44410648941993713,
      "learning_rate": 3.260411919196866e-06,
      "loss": 0.3642,
      "step": 3259
    },
    {
      "epoch": 0.2608,
      "grad_norm": 0.48321714997291565,
      "learning_rate": 3.2571397633909252e-06,
      "loss": 0.3237,
      "step": 3260
    },
    {
      "epoch": 0.261,
      "grad_norm": 0.9209753274917603,
      "learning_rate": 3.2538684570338908e-06,
      "loss": 0.3746,
      "step": 3261
    },
    {
      "epoch": 0.2612,
      "grad_norm": 0.4476669728755951,
      "learning_rate": 3.2505980017201564e-06,
      "loss": 0.3402,
      "step": 3262
    },
    {
      "epoch": 0.2614,
      "grad_norm": 0.4983075261116028,
      "learning_rate": 3.247328399043706e-06,
      "loss": 0.3698,
      "step": 3263
    },
    {
      "epoch": 0.2616,
      "grad_norm": 0.46804481744766235,
      "learning_rate": 3.2440596505981005e-06,
      "loss": 0.3174,
      "step": 3264
    },
    {
      "epoch": 0.2618,
      "grad_norm": 0.46224385499954224,
      "learning_rate": 3.2407917579764914e-06,
      "loss": 0.3295,
      "step": 3265
    },
    {
      "epoch": 0.262,
      "grad_norm": 0.6075366735458374,
      "learning_rate": 3.2375247227716077e-06,
      "loss": 0.337,
      "step": 3266
    },
    {
      "epoch": 0.2622,
      "grad_norm": 0.47221529483795166,
      "learning_rate": 3.2342585465757625e-06,
      "loss": 0.3522,
      "step": 3267
    },
    {
      "epoch": 0.2624,
      "grad_norm": 0.4784944951534271,
      "learning_rate": 3.230993230980853e-06,
      "loss": 0.3574,
      "step": 3268
    },
    {
      "epoch": 0.2626,
      "grad_norm": 0.4861390292644501,
      "learning_rate": 3.227728777578353e-06,
      "loss": 0.3499,
      "step": 3269
    },
    {
      "epoch": 0.2628,
      "grad_norm": 0.4868766963481903,
      "learning_rate": 3.224465187959316e-06,
      "loss": 0.3342,
      "step": 3270
    },
    {
      "epoch": 0.263,
      "grad_norm": 0.38834425806999207,
      "learning_rate": 3.2212024637143756e-06,
      "loss": 0.2976,
      "step": 3271
    },
    {
      "epoch": 0.2632,
      "grad_norm": 0.5407184362411499,
      "learning_rate": 3.217940606433747e-06,
      "loss": 0.2969,
      "step": 3272
    },
    {
      "epoch": 0.2634,
      "grad_norm": 0.5068117380142212,
      "learning_rate": 3.2146796177072183e-06,
      "loss": 0.3223,
      "step": 3273
    },
    {
      "epoch": 0.2636,
      "grad_norm": 0.43453794717788696,
      "learning_rate": 3.211419499124154e-06,
      "loss": 0.3227,
      "step": 3274
    },
    {
      "epoch": 0.2638,
      "grad_norm": 0.679714024066925,
      "learning_rate": 3.2081602522734987e-06,
      "loss": 0.361,
      "step": 3275
    },
    {
      "epoch": 0.264,
      "grad_norm": 0.4594070613384247,
      "learning_rate": 3.2049018787437693e-06,
      "loss": 0.3411,
      "step": 3276
    },
    {
      "epoch": 0.2642,
      "grad_norm": 0.41981422901153564,
      "learning_rate": 3.201644380123056e-06,
      "loss": 0.3434,
      "step": 3277
    },
    {
      "epoch": 0.2644,
      "grad_norm": 0.4653003513813019,
      "learning_rate": 3.1983877579990276e-06,
      "loss": 0.3058,
      "step": 3278
    },
    {
      "epoch": 0.2646,
      "grad_norm": 0.4680175483226776,
      "learning_rate": 3.195132013958918e-06,
      "loss": 0.3463,
      "step": 3279
    },
    {
      "epoch": 0.2648,
      "grad_norm": 0.5456931591033936,
      "learning_rate": 3.1918771495895395e-06,
      "loss": 0.3608,
      "step": 3280
    },
    {
      "epoch": 0.265,
      "grad_norm": 0.4045979976654053,
      "learning_rate": 3.188623166477272e-06,
      "loss": 0.3387,
      "step": 3281
    },
    {
      "epoch": 0.2652,
      "grad_norm": 0.44164782762527466,
      "learning_rate": 3.185370066208069e-06,
      "loss": 0.3276,
      "step": 3282
    },
    {
      "epoch": 0.2654,
      "grad_norm": 0.4605453610420227,
      "learning_rate": 3.1821178503674515e-06,
      "loss": 0.3455,
      "step": 3283
    },
    {
      "epoch": 0.2656,
      "grad_norm": 0.37227174639701843,
      "learning_rate": 3.178866520540509e-06,
      "loss": 0.3308,
      "step": 3284
    },
    {
      "epoch": 0.2658,
      "grad_norm": 0.4966893494129181,
      "learning_rate": 3.1756160783119015e-06,
      "loss": 0.3668,
      "step": 3285
    },
    {
      "epoch": 0.266,
      "grad_norm": 0.4358917474746704,
      "learning_rate": 3.1723665252658564e-06,
      "loss": 0.3076,
      "step": 3286
    },
    {
      "epoch": 0.2662,
      "grad_norm": 0.4465812146663666,
      "learning_rate": 3.169117862986163e-06,
      "loss": 0.336,
      "step": 3287
    },
    {
      "epoch": 0.2664,
      "grad_norm": 0.5170059204101562,
      "learning_rate": 3.16587009305618e-06,
      "loss": 0.3044,
      "step": 3288
    },
    {
      "epoch": 0.2666,
      "grad_norm": 0.5737504959106445,
      "learning_rate": 3.1626232170588343e-06,
      "loss": 0.3187,
      "step": 3289
    },
    {
      "epoch": 0.2668,
      "grad_norm": 0.5212598443031311,
      "learning_rate": 3.1593772365766107e-06,
      "loss": 0.349,
      "step": 3290
    },
    {
      "epoch": 0.267,
      "grad_norm": 0.4405975937843323,
      "learning_rate": 3.1561321531915622e-06,
      "loss": 0.3171,
      "step": 3291
    },
    {
      "epoch": 0.2672,
      "grad_norm": 0.44126468896865845,
      "learning_rate": 3.152887968485303e-06,
      "loss": 0.3254,
      "step": 3292
    },
    {
      "epoch": 0.2674,
      "grad_norm": 0.39868786931037903,
      "learning_rate": 3.149644684039008e-06,
      "loss": 0.3242,
      "step": 3293
    },
    {
      "epoch": 0.2676,
      "grad_norm": 0.4279317855834961,
      "learning_rate": 3.1464023014334164e-06,
      "loss": 0.3055,
      "step": 3294
    },
    {
      "epoch": 0.2678,
      "grad_norm": 0.424373060464859,
      "learning_rate": 3.1431608222488276e-06,
      "loss": 0.3205,
      "step": 3295
    },
    {
      "epoch": 0.268,
      "grad_norm": 0.69850093126297,
      "learning_rate": 3.139920248065095e-06,
      "loss": 0.3141,
      "step": 3296
    },
    {
      "epoch": 0.2682,
      "grad_norm": 0.3811708688735962,
      "learning_rate": 3.1366805804616353e-06,
      "loss": 0.3346,
      "step": 3297
    },
    {
      "epoch": 0.2684,
      "grad_norm": 0.43823280930519104,
      "learning_rate": 3.1334418210174268e-06,
      "loss": 0.3341,
      "step": 3298
    },
    {
      "epoch": 0.2686,
      "grad_norm": 0.516895055770874,
      "learning_rate": 3.130203971310999e-06,
      "loss": 0.3519,
      "step": 3299
    },
    {
      "epoch": 0.2688,
      "grad_norm": 0.5121503472328186,
      "learning_rate": 3.12696703292044e-06,
      "loss": 0.3566,
      "step": 3300
    },
    {
      "epoch": 0.269,
      "grad_norm": 0.38959959149360657,
      "learning_rate": 3.1237310074233964e-06,
      "loss": 0.3358,
      "step": 3301
    },
    {
      "epoch": 0.2692,
      "grad_norm": 0.6207625865936279,
      "learning_rate": 3.1204958963970666e-06,
      "loss": 0.3208,
      "step": 3302
    },
    {
      "epoch": 0.2694,
      "grad_norm": 0.3876122832298279,
      "learning_rate": 3.117261701418204e-06,
      "loss": 0.3263,
      "step": 3303
    },
    {
      "epoch": 0.2696,
      "grad_norm": 0.6662579774856567,
      "learning_rate": 3.114028424063118e-06,
      "loss": 0.3549,
      "step": 3304
    },
    {
      "epoch": 0.0002,
      "grad_norm": 0.3417086601257324,
      "learning_rate": 3.110796065907665e-06,
      "loss": 0.3291,
      "step": 3305
    },
    {
      "epoch": 0.0004,
      "grad_norm": 0.5300449728965759,
      "learning_rate": 3.1075646285272608e-06,
      "loss": 0.3353,
      "step": 3306
    },
    {
      "epoch": 0.0006,
      "grad_norm": 0.45931947231292725,
      "learning_rate": 3.1043341134968653e-06,
      "loss": 0.3697,
      "step": 3307
    },
    {
      "epoch": 0.0008,
      "grad_norm": 0.45020705461502075,
      "learning_rate": 3.1011045223909954e-06,
      "loss": 0.3512,
      "step": 3308
    },
    {
      "epoch": 0.001,
      "grad_norm": 0.6384966373443604,
      "learning_rate": 3.097875856783713e-06,
      "loss": 0.3252,
      "step": 3309
    },
    {
      "epoch": 0.0012,
      "grad_norm": 0.6122851967811584,
      "learning_rate": 3.09464811824863e-06,
      "loss": 0.3138,
      "step": 3310
    },
    {
      "epoch": 0.0014,
      "grad_norm": 0.4463818669319153,
      "learning_rate": 3.0914213083589086e-06,
      "loss": 0.3314,
      "step": 3311
    },
    {
      "epoch": 0.0016,
      "grad_norm": 0.4660632908344269,
      "learning_rate": 3.088195428687254e-06,
      "loss": 0.3505,
      "step": 3312
    },
    {
      "epoch": 0.0018,
      "grad_norm": 0.41439515352249146,
      "learning_rate": 3.0849704808059266e-06,
      "loss": 0.3567,
      "step": 3313
    },
    {
      "epoch": 0.002,
      "grad_norm": 0.48180076479911804,
      "learning_rate": 3.0817464662867192e-06,
      "loss": 0.3379,
      "step": 3314
    },
    {
      "epoch": 0.0022,
      "grad_norm": 0.4690556824207306,
      "learning_rate": 3.078523386700982e-06,
      "loss": 0.3681,
      "step": 3315
    },
    {
      "epoch": 0.0024,
      "grad_norm": 0.54202800989151,
      "learning_rate": 3.0753012436196033e-06,
      "loss": 0.3239,
      "step": 3316
    },
    {
      "epoch": 0.0026,
      "grad_norm": 0.4554392993450165,
      "learning_rate": 3.0720800386130176e-06,
      "loss": 0.3146,
      "step": 3317
    },
    {
      "epoch": 0.0028,
      "grad_norm": 0.6749238967895508,
      "learning_rate": 3.0688597732512004e-06,
      "loss": 0.3653,
      "step": 3318
    },
    {
      "epoch": 0.003,
      "grad_norm": 0.5309857726097107,
      "learning_rate": 3.0656404491036696e-06,
      "loss": 0.3393,
      "step": 3319
    },
    {
      "epoch": 0.0032,
      "grad_norm": 0.4393368363380432,
      "learning_rate": 3.0624220677394854e-06,
      "loss": 0.3536,
      "step": 3320
    },
    {
      "epoch": 0.0034,
      "grad_norm": 0.4020238220691681,
      "learning_rate": 3.059204630727247e-06,
      "loss": 0.3005,
      "step": 3321
    },
    {
      "epoch": 0.0036,
      "grad_norm": 0.4605359435081482,
      "learning_rate": 3.0559881396350967e-06,
      "loss": 0.3484,
      "step": 3322
    },
    {
      "epoch": 0.0038,
      "grad_norm": 0.6430800557136536,
      "learning_rate": 3.0527725960307083e-06,
      "loss": 0.3594,
      "step": 3323
    },
    {
      "epoch": 0.004,
      "grad_norm": 0.4888957440853119,
      "learning_rate": 3.049558001481302e-06,
      "loss": 0.3069,
      "step": 3324
    },
    {
      "epoch": 0.0042,
      "grad_norm": 0.41833147406578064,
      "learning_rate": 3.0463443575536324e-06,
      "loss": 0.3495,
      "step": 3325
    },
    {
      "epoch": 0.0044,
      "grad_norm": 0.4756115674972534,
      "learning_rate": 3.043131665813988e-06,
      "loss": 0.3413,
      "step": 3326
    },
    {
      "epoch": 0.0046,
      "grad_norm": 0.41479527950286865,
      "learning_rate": 3.0399199278281986e-06,
      "loss": 0.3242,
      "step": 3327
    },
    {
      "epoch": 0.0048,
      "grad_norm": 0.6084129214286804,
      "learning_rate": 3.0367091451616254e-06,
      "loss": 0.3299,
      "step": 3328
    },
    {
      "epoch": 0.005,
      "grad_norm": 0.45694082975387573,
      "learning_rate": 3.033499319379163e-06,
      "loss": 0.3343,
      "step": 3329
    },
    {
      "epoch": 0.0052,
      "grad_norm": 0.530125617980957,
      "learning_rate": 3.030290452045245e-06,
      "loss": 0.3361,
      "step": 3330
    },
    {
      "epoch": 0.0054,
      "grad_norm": 0.5801146626472473,
      "learning_rate": 3.0270825447238316e-06,
      "loss": 0.363,
      "step": 3331
    },
    {
      "epoch": 0.0056,
      "grad_norm": 0.43761229515075684,
      "learning_rate": 3.023875598978419e-06,
      "loss": 0.3421,
      "step": 3332
    },
    {
      "epoch": 0.0058,
      "grad_norm": 0.5199820399284363,
      "learning_rate": 3.0206696163720317e-06,
      "loss": 0.3098,
      "step": 3333
    },
    {
      "epoch": 0.006,
      "grad_norm": 0.41703107953071594,
      "learning_rate": 3.0174645984672298e-06,
      "loss": 0.3384,
      "step": 3334
    },
    {
      "epoch": 0.0062,
      "grad_norm": 0.4267464280128479,
      "learning_rate": 3.0142605468260976e-06,
      "loss": 0.3527,
      "step": 3335
    },
    {
      "epoch": 0.0064,
      "grad_norm": 0.6885372996330261,
      "learning_rate": 3.011057463010252e-06,
      "loss": 0.2855,
      "step": 3336
    },
    {
      "epoch": 0.0066,
      "grad_norm": 0.4761621654033661,
      "learning_rate": 3.007855348580837e-06,
      "loss": 0.3056,
      "step": 3337
    },
    {
      "epoch": 0.0068,
      "grad_norm": 0.4726260304450989,
      "learning_rate": 3.004654205098524e-06,
      "loss": 0.3157,
      "step": 3338
    },
    {
      "epoch": 0.007,
      "grad_norm": 0.4167080223560333,
      "learning_rate": 3.001454034123512e-06,
      "loss": 0.3411,
      "step": 3339
    },
    {
      "epoch": 0.0072,
      "grad_norm": 0.6069725155830383,
      "learning_rate": 2.9982548372155264e-06,
      "loss": 0.3745,
      "step": 3340
    },
    {
      "epoch": 0.0074,
      "grad_norm": 0.4863918423652649,
      "learning_rate": 2.9950566159338146e-06,
      "loss": 0.3724,
      "step": 3341
    },
    {
      "epoch": 0.0076,
      "grad_norm": 0.5000196695327759,
      "learning_rate": 2.991859371837151e-06,
      "loss": 0.3441,
      "step": 3342
    },
    {
      "epoch": 0.0078,
      "grad_norm": 0.532791256904602,
      "learning_rate": 2.9886631064838355e-06,
      "loss": 0.3477,
      "step": 3343
    },
    {
      "epoch": 0.008,
      "grad_norm": 0.49018147587776184,
      "learning_rate": 2.9854678214316875e-06,
      "loss": 0.3417,
      "step": 3344
    },
    {
      "epoch": 0.0082,
      "grad_norm": 0.48229849338531494,
      "learning_rate": 2.98227351823805e-06,
      "loss": 0.3351,
      "step": 3345
    },
    {
      "epoch": 0.0084,
      "grad_norm": 0.4256087839603424,
      "learning_rate": 2.9790801984597885e-06,
      "loss": 0.3231,
      "step": 3346
    },
    {
      "epoch": 0.0086,
      "grad_norm": 0.6348013877868652,
      "learning_rate": 2.9758878636532884e-06,
      "loss": 0.3138,
      "step": 3347
    },
    {
      "epoch": 0.0088,
      "grad_norm": 0.4279901683330536,
      "learning_rate": 2.972696515374455e-06,
      "loss": 0.3259,
      "step": 3348
    },
    {
      "epoch": 0.009,
      "grad_norm": 0.47948503494262695,
      "learning_rate": 2.969506155178711e-06,
      "loss": 0.3312,
      "step": 3349
    },
    {
      "epoch": 0.0092,
      "grad_norm": 0.5095896124839783,
      "learning_rate": 2.966316784621e-06,
      "loss": 0.3345,
      "step": 3350
    },
    {
      "epoch": 0.0094,
      "grad_norm": 1.102949857711792,
      "learning_rate": 2.963128405255783e-06,
      "loss": 0.3267,
      "step": 3351
    },
    {
      "epoch": 0.0096,
      "grad_norm": 0.4364752173423767,
      "learning_rate": 2.9599410186370363e-06,
      "loss": 0.3226,
      "step": 3352
    },
    {
      "epoch": 0.0098,
      "grad_norm": 0.3773094713687897,
      "learning_rate": 2.9567546263182554e-06,
      "loss": 0.3251,
      "step": 3353
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5330284833908081,
      "learning_rate": 2.9535692298524477e-06,
      "loss": 0.3056,
      "step": 3354
    },
    {
      "epoch": 0.0102,
      "grad_norm": 0.41424864530563354,
      "learning_rate": 2.9503848307921363e-06,
      "loss": 0.2833,
      "step": 3355
    },
    {
      "epoch": 0.0104,
      "grad_norm": 0.39437752962112427,
      "learning_rate": 2.9472014306893605e-06,
      "loss": 0.3193,
      "step": 3356
    },
    {
      "epoch": 0.0106,
      "grad_norm": 0.505425751209259,
      "learning_rate": 2.94401903109567e-06,
      "loss": 0.3638,
      "step": 3357
    },
    {
      "epoch": 0.0108,
      "grad_norm": 0.43204376101493835,
      "learning_rate": 2.940837633562127e-06,
      "loss": 0.3048,
      "step": 3358
    },
    {
      "epoch": 0.011,
      "grad_norm": 0.4324241578578949,
      "learning_rate": 2.9376572396393047e-06,
      "loss": 0.3164,
      "step": 3359
    },
    {
      "epoch": 0.0112,
      "grad_norm": 0.46588289737701416,
      "learning_rate": 2.934477850877292e-06,
      "loss": 0.3134,
      "step": 3360
    },
    {
      "epoch": 0.0114,
      "grad_norm": 0.4171079993247986,
      "learning_rate": 2.931299468825682e-06,
      "loss": 0.3314,
      "step": 3361
    },
    {
      "epoch": 0.0116,
      "grad_norm": 0.3853009343147278,
      "learning_rate": 2.92812209503358e-06,
      "loss": 0.3239,
      "step": 3362
    },
    {
      "epoch": 0.0118,
      "grad_norm": 0.9680476188659668,
      "learning_rate": 2.9249457310495994e-06,
      "loss": 0.328,
      "step": 3363
    },
    {
      "epoch": 0.012,
      "grad_norm": 0.5430434346199036,
      "learning_rate": 2.921770378421861e-06,
      "loss": 0.3566,
      "step": 3364
    },
    {
      "epoch": 0.0122,
      "grad_norm": 0.4520861506462097,
      "learning_rate": 2.918596038697995e-06,
      "loss": 0.3459,
      "step": 3365
    },
    {
      "epoch": 0.0124,
      "grad_norm": 0.5839866399765015,
      "learning_rate": 2.915422713425134e-06,
      "loss": 0.3219,
      "step": 3366
    },
    {
      "epoch": 0.0126,
      "grad_norm": 0.46124231815338135,
      "learning_rate": 2.912250404149918e-06,
      "loss": 0.367,
      "step": 3367
    },
    {
      "epoch": 0.0128,
      "grad_norm": 0.4196004867553711,
      "learning_rate": 2.9090791124184934e-06,
      "loss": 0.3314,
      "step": 3368
    },
    {
      "epoch": 0.013,
      "grad_norm": 0.39716142416000366,
      "learning_rate": 2.905908839776509e-06,
      "loss": 0.3027,
      "step": 3369
    },
    {
      "epoch": 0.0132,
      "grad_norm": 0.938429057598114,
      "learning_rate": 2.9027395877691143e-06,
      "loss": 0.3269,
      "step": 3370
    },
    {
      "epoch": 0.0134,
      "grad_norm": 0.5183779001235962,
      "learning_rate": 2.899571357940969e-06,
      "loss": 0.3295,
      "step": 3371
    },
    {
      "epoch": 0.0136,
      "grad_norm": 0.6193860769271851,
      "learning_rate": 2.896404151836227e-06,
      "loss": 0.3779,
      "step": 3372
    },
    {
      "epoch": 0.0138,
      "grad_norm": 0.48205330967903137,
      "learning_rate": 2.893237970998547e-06,
      "loss": 0.3407,
      "step": 3373
    },
    {
      "epoch": 0.014,
      "grad_norm": 1.8560532331466675,
      "learning_rate": 2.8900728169710866e-06,
      "loss": 0.362,
      "step": 3374
    },
    {
      "epoch": 0.0142,
      "grad_norm": 0.4499136507511139,
      "learning_rate": 2.886908691296504e-06,
      "loss": 0.3438,
      "step": 3375
    },
    {
      "epoch": 0.0144,
      "grad_norm": 0.5113393664360046,
      "learning_rate": 2.8837455955169547e-06,
      "loss": 0.3345,
      "step": 3376
    },
    {
      "epoch": 0.0146,
      "grad_norm": 0.5178636312484741,
      "learning_rate": 2.8805835311740933e-06,
      "loss": 0.3436,
      "step": 3377
    },
    {
      "epoch": 0.0148,
      "grad_norm": 0.4356480836868286,
      "learning_rate": 2.877422499809072e-06,
      "loss": 0.3086,
      "step": 3378
    },
    {
      "epoch": 0.015,
      "grad_norm": 0.4197707176208496,
      "learning_rate": 2.874262502962537e-06,
      "loss": 0.3216,
      "step": 3379
    },
    {
      "epoch": 0.0152,
      "grad_norm": 0.4820667505264282,
      "learning_rate": 2.871103542174637e-06,
      "loss": 0.3319,
      "step": 3380
    },
    {
      "epoch": 0.0154,
      "grad_norm": 0.4427228271961212,
      "learning_rate": 2.8679456189850076e-06,
      "loss": 0.3419,
      "step": 3381
    },
    {
      "epoch": 0.0156,
      "grad_norm": 0.4847075641155243,
      "learning_rate": 2.864788734932783e-06,
      "loss": 0.3581,
      "step": 3382
    },
    {
      "epoch": 0.0158,
      "grad_norm": 0.48662900924682617,
      "learning_rate": 2.8616328915565907e-06,
      "loss": 0.3525,
      "step": 3383
    },
    {
      "epoch": 0.016,
      "grad_norm": 0.41125011444091797,
      "learning_rate": 2.858478090394549e-06,
      "loss": 0.3622,
      "step": 3384
    },
    {
      "epoch": 0.0162,
      "grad_norm": 0.4061368405818939,
      "learning_rate": 2.8553243329842715e-06,
      "loss": 0.3127,
      "step": 3385
    },
    {
      "epoch": 0.0164,
      "grad_norm": 0.47644758224487305,
      "learning_rate": 2.8521716208628597e-06,
      "loss": 0.321,
      "step": 3386
    },
    {
      "epoch": 0.0166,
      "grad_norm": 0.43386417627334595,
      "learning_rate": 2.849019955566908e-06,
      "loss": 0.3529,
      "step": 3387
    },
    {
      "epoch": 0.0168,
      "grad_norm": 0.3699614703655243,
      "learning_rate": 2.8458693386325e-06,
      "loss": 0.3232,
      "step": 3388
    },
    {
      "epoch": 0.017,
      "grad_norm": 0.48079290986061096,
      "learning_rate": 2.8427197715952047e-06,
      "loss": 0.3584,
      "step": 3389
    },
    {
      "epoch": 0.0172,
      "grad_norm": 0.4768579602241516,
      "learning_rate": 2.839571255990088e-06,
      "loss": 0.3046,
      "step": 3390
    },
    {
      "epoch": 0.0174,
      "grad_norm": 0.4312642812728882,
      "learning_rate": 2.8364237933516964e-06,
      "loss": 0.3383,
      "step": 3391
    },
    {
      "epoch": 0.0176,
      "grad_norm": 0.42464709281921387,
      "learning_rate": 2.8332773852140644e-06,
      "loss": 0.3106,
      "step": 3392
    },
    {
      "epoch": 0.0178,
      "grad_norm": 0.4832788109779358,
      "learning_rate": 2.830132033110713e-06,
      "loss": 0.3226,
      "step": 3393
    },
    {
      "epoch": 0.018,
      "grad_norm": 0.45375314354896545,
      "learning_rate": 2.826987738574649e-06,
      "loss": 0.3097,
      "step": 3394
    },
    {
      "epoch": 0.0182,
      "grad_norm": 0.5469930171966553,
      "learning_rate": 2.8238445031383634e-06,
      "loss": 0.3354,
      "step": 3395
    },
    {
      "epoch": 0.0184,
      "grad_norm": 0.6789397597312927,
      "learning_rate": 2.8207023283338304e-06,
      "loss": 0.3342,
      "step": 3396
    },
    {
      "epoch": 0.0186,
      "grad_norm": 0.4920266568660736,
      "learning_rate": 2.8175612156925082e-06,
      "loss": 0.319,
      "step": 3397
    },
    {
      "epoch": 0.0188,
      "grad_norm": 0.440031498670578,
      "learning_rate": 2.814421166745337e-06,
      "loss": 0.3363,
      "step": 3398
    },
    {
      "epoch": 0.019,
      "grad_norm": 0.4624188244342804,
      "learning_rate": 2.811282183022736e-06,
      "loss": 0.3076,
      "step": 3399
    },
    {
      "epoch": 0.0192,
      "grad_norm": 0.4941563010215759,
      "learning_rate": 2.8081442660546126e-06,
      "loss": 0.355,
      "step": 3400
    },
    {
      "epoch": 0.0194,
      "grad_norm": 0.5168378353118896,
      "learning_rate": 2.805007417370347e-06,
      "loss": 0.33,
      "step": 3401
    },
    {
      "epoch": 0.0196,
      "grad_norm": 0.43909958004951477,
      "learning_rate": 2.8018716384988034e-06,
      "loss": 0.3088,
      "step": 3402
    },
    {
      "epoch": 0.0198,
      "grad_norm": 0.41070765256881714,
      "learning_rate": 2.798736930968315e-06,
      "loss": 0.336,
      "step": 3403
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.4840502440929413,
      "learning_rate": 2.795603296306708e-06,
      "loss": 0.3008,
      "step": 3404
    },
    {
      "epoch": 0.0202,
      "grad_norm": 0.3868803083896637,
      "learning_rate": 2.7924707360412743e-06,
      "loss": 0.3079,
      "step": 3405
    },
    {
      "epoch": 0.0204,
      "grad_norm": 0.417752206325531,
      "learning_rate": 2.7893392516987873e-06,
      "loss": 0.3305,
      "step": 3406
    },
    {
      "epoch": 0.0206,
      "grad_norm": 0.4529922902584076,
      "learning_rate": 2.7862088448054936e-06,
      "loss": 0.344,
      "step": 3407
    },
    {
      "epoch": 0.0208,
      "grad_norm": 0.5844095945358276,
      "learning_rate": 2.7830795168871127e-06,
      "loss": 0.3144,
      "step": 3408
    },
    {
      "epoch": 0.021,
      "grad_norm": 0.43798694014549255,
      "learning_rate": 2.779951269468847e-06,
      "loss": 0.3325,
      "step": 3409
    },
    {
      "epoch": 0.0212,
      "grad_norm": 0.6590871214866638,
      "learning_rate": 2.776824104075364e-06,
      "loss": 0.3409,
      "step": 3410
    },
    {
      "epoch": 0.0214,
      "grad_norm": 0.44794657826423645,
      "learning_rate": 2.7736980222308042e-06,
      "loss": 0.3213,
      "step": 3411
    },
    {
      "epoch": 0.0216,
      "grad_norm": 0.4490557909011841,
      "learning_rate": 2.7705730254587802e-06,
      "loss": 0.3405,
      "step": 3412
    },
    {
      "epoch": 0.0218,
      "grad_norm": 0.4930865168571472,
      "learning_rate": 2.7674491152823825e-06,
      "loss": 0.3542,
      "step": 3413
    },
    {
      "epoch": 0.022,
      "grad_norm": 0.7336243987083435,
      "learning_rate": 2.7643262932241642e-06,
      "loss": 0.3441,
      "step": 3414
    },
    {
      "epoch": 0.0222,
      "grad_norm": 0.495273232460022,
      "learning_rate": 2.761204560806152e-06,
      "loss": 0.3248,
      "step": 3415
    },
    {
      "epoch": 0.0224,
      "grad_norm": 0.4444357752799988,
      "learning_rate": 2.7580839195498397e-06,
      "loss": 0.3555,
      "step": 3416
    },
    {
      "epoch": 0.0226,
      "grad_norm": 0.4423377811908722,
      "learning_rate": 2.75496437097619e-06,
      "loss": 0.3523,
      "step": 3417
    },
    {
      "epoch": 0.0228,
      "grad_norm": 0.5495564937591553,
      "learning_rate": 2.75184591660563e-06,
      "loss": 0.3359,
      "step": 3418
    },
    {
      "epoch": 0.023,
      "grad_norm": 0.48251861333847046,
      "learning_rate": 2.7487285579580635e-06,
      "loss": 0.3106,
      "step": 3419
    },
    {
      "epoch": 0.0232,
      "grad_norm": 0.5753570199012756,
      "learning_rate": 2.7456122965528475e-06,
      "loss": 0.3538,
      "step": 3420
    },
    {
      "epoch": 0.0234,
      "grad_norm": 0.41920551657676697,
      "learning_rate": 2.742497133908812e-06,
      "loss": 0.3227,
      "step": 3421
    },
    {
      "epoch": 0.0236,
      "grad_norm": 0.6996517777442932,
      "learning_rate": 2.739383071544246e-06,
      "loss": 0.3311,
      "step": 3422
    },
    {
      "epoch": 0.0238,
      "grad_norm": 0.5361956357955933,
      "learning_rate": 2.736270110976912e-06,
      "loss": 0.3594,
      "step": 3423
    },
    {
      "epoch": 0.024,
      "grad_norm": 0.5597459077835083,
      "learning_rate": 2.7331582537240243e-06,
      "loss": 0.3429,
      "step": 3424
    },
    {
      "epoch": 0.0242,
      "grad_norm": 0.44189727306365967,
      "learning_rate": 2.7300475013022666e-06,
      "loss": 0.3457,
      "step": 3425
    },
    {
      "epoch": 0.0244,
      "grad_norm": 0.39592963457107544,
      "learning_rate": 2.726937855227781e-06,
      "loss": 0.3149,
      "step": 3426
    },
    {
      "epoch": 0.0246,
      "grad_norm": 0.3928285539150238,
      "learning_rate": 2.723829317016169e-06,
      "loss": 0.3408,
      "step": 3427
    },
    {
      "epoch": 0.0248,
      "grad_norm": 0.4529823362827301,
      "learning_rate": 2.7207218881825016e-06,
      "loss": 0.3353,
      "step": 3428
    },
    {
      "epoch": 0.025,
      "grad_norm": 0.5789965391159058,
      "learning_rate": 2.717615570241294e-06,
      "loss": 0.3702,
      "step": 3429
    },
    {
      "epoch": 0.0252,
      "grad_norm": 0.5200404524803162,
      "learning_rate": 2.714510364706531e-06,
      "loss": 0.3282,
      "step": 3430
    },
    {
      "epoch": 0.0254,
      "grad_norm": 0.41471487283706665,
      "learning_rate": 2.7114062730916513e-06,
      "loss": 0.3187,
      "step": 3431
    },
    {
      "epoch": 0.0256,
      "grad_norm": 0.4557587802410126,
      "learning_rate": 2.708303296909551e-06,
      "loss": 0.3156,
      "step": 3432
    },
    {
      "epoch": 0.0258,
      "grad_norm": 0.6695243120193481,
      "learning_rate": 2.705201437672585e-06,
      "loss": 0.3516,
      "step": 3433
    },
    {
      "epoch": 0.026,
      "grad_norm": 0.5438603758811951,
      "learning_rate": 2.7021006968925613e-06,
      "loss": 0.3347,
      "step": 3434
    },
    {
      "epoch": 0.0262,
      "grad_norm": 0.4305674135684967,
      "learning_rate": 2.699001076080742e-06,
      "loss": 0.3292,
      "step": 3435
    },
    {
      "epoch": 0.0264,
      "grad_norm": 0.5096269249916077,
      "learning_rate": 2.6959025767478466e-06,
      "loss": 0.3278,
      "step": 3436
    },
    {
      "epoch": 0.0266,
      "grad_norm": 0.5151652097702026,
      "learning_rate": 2.692805200404044e-06,
      "loss": 0.3274,
      "step": 3437
    },
    {
      "epoch": 0.0268,
      "grad_norm": 0.5499328374862671,
      "learning_rate": 2.6897089485589584e-06,
      "loss": 0.3266,
      "step": 3438
    },
    {
      "epoch": 0.027,
      "grad_norm": 0.4079466760158539,
      "learning_rate": 2.686613822721666e-06,
      "loss": 0.3552,
      "step": 3439
    },
    {
      "epoch": 0.0272,
      "grad_norm": 0.41292351484298706,
      "learning_rate": 2.683519824400693e-06,
      "loss": 0.3538,
      "step": 3440
    },
    {
      "epoch": 0.0274,
      "grad_norm": 0.5678034424781799,
      "learning_rate": 2.680426955104014e-06,
      "loss": 0.3582,
      "step": 3441
    },
    {
      "epoch": 0.0276,
      "grad_norm": 0.5599619150161743,
      "learning_rate": 2.677335216339062e-06,
      "loss": 0.3364,
      "step": 3442
    },
    {
      "epoch": 0.0278,
      "grad_norm": 0.5763043761253357,
      "learning_rate": 2.6742446096127086e-06,
      "loss": 0.3116,
      "step": 3443
    },
    {
      "epoch": 0.028,
      "grad_norm": 0.43107983469963074,
      "learning_rate": 2.671155136431279e-06,
      "loss": 0.3405,
      "step": 3444
    },
    {
      "epoch": 0.0282,
      "grad_norm": 0.5595728158950806,
      "learning_rate": 2.6680667983005446e-06,
      "loss": 0.3319,
      "step": 3445
    },
    {
      "epoch": 0.0284,
      "grad_norm": 0.4000933766365051,
      "learning_rate": 2.6649795967257243e-06,
      "loss": 0.3234,
      "step": 3446
    },
    {
      "epoch": 0.0286,
      "grad_norm": 0.4503725469112396,
      "learning_rate": 2.661893533211482e-06,
      "loss": 0.3232,
      "step": 3447
    },
    {
      "epoch": 0.0288,
      "grad_norm": 0.48208072781562805,
      "learning_rate": 2.658808609261928e-06,
      "loss": 0.3355,
      "step": 3448
    },
    {
      "epoch": 0.029,
      "grad_norm": 0.5025602579116821,
      "learning_rate": 2.6557248263806175e-06,
      "loss": 0.3508,
      "step": 3449
    },
    {
      "epoch": 0.0292,
      "grad_norm": 0.42113032937049866,
      "learning_rate": 2.6526421860705474e-06,
      "loss": 0.3399,
      "step": 3450
    },
    {
      "epoch": 0.0294,
      "grad_norm": 0.4435068666934967,
      "learning_rate": 2.649560689834158e-06,
      "loss": 0.3506,
      "step": 3451
    },
    {
      "epoch": 0.0296,
      "grad_norm": 0.43500810861587524,
      "learning_rate": 2.646480339173337e-06,
      "loss": 0.3079,
      "step": 3452
    },
    {
      "epoch": 0.0298,
      "grad_norm": 0.7357662320137024,
      "learning_rate": 2.6434011355894074e-06,
      "loss": 0.3428,
      "step": 3453
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.40943044424057007,
      "learning_rate": 2.640323080583137e-06,
      "loss": 0.2978,
      "step": 3454
    },
    {
      "epoch": 0.0302,
      "grad_norm": 0.4741467833518982,
      "learning_rate": 2.637246175654731e-06,
      "loss": 0.3652,
      "step": 3455
    },
    {
      "epoch": 0.0304,
      "grad_norm": 0.4472401738166809,
      "learning_rate": 2.634170422303835e-06,
      "loss": 0.3159,
      "step": 3456
    },
    {
      "epoch": 0.0306,
      "grad_norm": 0.46161511540412903,
      "learning_rate": 2.6310958220295356e-06,
      "loss": 0.3338,
      "step": 3457
    },
    {
      "epoch": 0.0308,
      "grad_norm": 0.4246075451374054,
      "learning_rate": 2.6280223763303546e-06,
      "loss": 0.3308,
      "step": 3458
    },
    {
      "epoch": 0.031,
      "grad_norm": 0.4218193590641022,
      "learning_rate": 2.6249500867042523e-06,
      "loss": 0.3142,
      "step": 3459
    },
    {
      "epoch": 0.0312,
      "grad_norm": 0.47292882204055786,
      "learning_rate": 2.6218789546486235e-06,
      "loss": 0.3496,
      "step": 3460
    },
    {
      "epoch": 0.0314,
      "grad_norm": 0.42150601744651794,
      "learning_rate": 2.618808981660304e-06,
      "loss": 0.3254,
      "step": 3461
    },
    {
      "epoch": 0.0316,
      "grad_norm": 0.42035576701164246,
      "learning_rate": 2.61574016923556e-06,
      "loss": 0.332,
      "step": 3462
    },
    {
      "epoch": 0.0318,
      "grad_norm": 0.44265711307525635,
      "learning_rate": 2.612672518870093e-06,
      "loss": 0.35,
      "step": 3463
    },
    {
      "epoch": 0.032,
      "grad_norm": 0.3781023621559143,
      "learning_rate": 2.6096060320590393e-06,
      "loss": 0.336,
      "step": 3464
    },
    {
      "epoch": 0.0322,
      "grad_norm": 0.4664776921272278,
      "learning_rate": 2.6065407102969664e-06,
      "loss": 0.3308,
      "step": 3465
    },
    {
      "epoch": 0.0324,
      "grad_norm": 0.4754885733127594,
      "learning_rate": 2.6034765550778753e-06,
      "loss": 0.3418,
      "step": 3466
    },
    {
      "epoch": 0.0326,
      "grad_norm": 0.4392938017845154,
      "learning_rate": 2.600413567895198e-06,
      "loss": 0.3546,
      "step": 3467
    },
    {
      "epoch": 0.0328,
      "grad_norm": 0.6278036236763,
      "learning_rate": 2.5973517502417966e-06,
      "loss": 0.3508,
      "step": 3468
    },
    {
      "epoch": 0.033,
      "grad_norm": 0.5580092072486877,
      "learning_rate": 2.5942911036099657e-06,
      "loss": 0.3793,
      "step": 3469
    },
    {
      "epoch": 0.0332,
      "grad_norm": 0.4012444317340851,
      "learning_rate": 2.5912316294914232e-06,
      "loss": 0.3483,
      "step": 3470
    },
    {
      "epoch": 0.0334,
      "grad_norm": 0.43740880489349365,
      "learning_rate": 2.588173329377324e-06,
      "loss": 0.3721,
      "step": 3471
    },
    {
      "epoch": 0.0336,
      "grad_norm": 0.49290379881858826,
      "learning_rate": 2.5851162047582477e-06,
      "loss": 0.3644,
      "step": 3472
    },
    {
      "epoch": 0.0338,
      "grad_norm": 0.4045817255973816,
      "learning_rate": 2.582060257124195e-06,
      "loss": 0.3082,
      "step": 3473
    },
    {
      "epoch": 0.034,
      "grad_norm": 0.4119875729084015,
      "learning_rate": 2.5790054879645964e-06,
      "loss": 0.3698,
      "step": 3474
    },
    {
      "epoch": 0.0342,
      "grad_norm": 0.46459081768989563,
      "learning_rate": 2.5759518987683154e-06,
      "loss": 0.3687,
      "step": 3475
    },
    {
      "epoch": 0.0344,
      "grad_norm": 0.48892372846603394,
      "learning_rate": 2.5728994910236304e-06,
      "loss": 0.3341,
      "step": 3476
    },
    {
      "epoch": 0.0346,
      "grad_norm": 0.6295551061630249,
      "learning_rate": 2.5698482662182494e-06,
      "loss": 0.3571,
      "step": 3477
    },
    {
      "epoch": 0.0348,
      "grad_norm": 0.6210313439369202,
      "learning_rate": 2.5667982258393016e-06,
      "loss": 0.3686,
      "step": 3478
    },
    {
      "epoch": 0.035,
      "grad_norm": 0.5108376741409302,
      "learning_rate": 2.5637493713733376e-06,
      "loss": 0.3228,
      "step": 3479
    },
    {
      "epoch": 0.0352,
      "grad_norm": 0.48439666628837585,
      "learning_rate": 2.560701704306336e-06,
      "loss": 0.3511,
      "step": 3480
    },
    {
      "epoch": 0.0354,
      "grad_norm": 0.43855172395706177,
      "learning_rate": 2.557655226123693e-06,
      "loss": 0.3467,
      "step": 3481
    },
    {
      "epoch": 0.0356,
      "grad_norm": 0.45930927991867065,
      "learning_rate": 2.5546099383102206e-06,
      "loss": 0.3447,
      "step": 3482
    },
    {
      "epoch": 0.0358,
      "grad_norm": 0.5378358960151672,
      "learning_rate": 2.5515658423501573e-06,
      "loss": 0.3633,
      "step": 3483
    },
    {
      "epoch": 0.036,
      "grad_norm": 0.5684463381767273,
      "learning_rate": 2.5485229397271567e-06,
      "loss": 0.3616,
      "step": 3484
    },
    {
      "epoch": 0.0362,
      "grad_norm": 0.4006313383579254,
      "learning_rate": 2.545481231924296e-06,
      "loss": 0.3261,
      "step": 3485
    },
    {
      "epoch": 0.0364,
      "grad_norm": 0.8860622644424438,
      "learning_rate": 2.5424407204240653e-06,
      "loss": 0.3205,
      "step": 3486
    },
    {
      "epoch": 0.0366,
      "grad_norm": 0.7361705303192139,
      "learning_rate": 2.539401406708373e-06,
      "loss": 0.3152,
      "step": 3487
    },
    {
      "epoch": 0.0368,
      "grad_norm": 0.65683913230896,
      "learning_rate": 2.536363292258543e-06,
      "loss": 0.3432,
      "step": 3488
    },
    {
      "epoch": 0.037,
      "grad_norm": 0.424513041973114,
      "learning_rate": 2.533326378555314e-06,
      "loss": 0.358,
      "step": 3489
    },
    {
      "epoch": 0.0372,
      "grad_norm": 0.5015997290611267,
      "learning_rate": 2.5302906670788463e-06,
      "loss": 0.3217,
      "step": 3490
    },
    {
      "epoch": 0.0374,
      "grad_norm": 0.4386586844921112,
      "learning_rate": 2.527256159308703e-06,
      "loss": 0.3421,
      "step": 3491
    },
    {
      "epoch": 0.0376,
      "grad_norm": 0.5025037527084351,
      "learning_rate": 2.524222856723869e-06,
      "loss": 0.3393,
      "step": 3492
    },
    {
      "epoch": 0.0378,
      "grad_norm": 0.49238622188568115,
      "learning_rate": 2.5211907608027366e-06,
      "loss": 0.332,
      "step": 3493
    },
    {
      "epoch": 0.038,
      "grad_norm": 0.41748538613319397,
      "learning_rate": 2.518159873023116e-06,
      "loss": 0.3163,
      "step": 3494
    },
    {
      "epoch": 0.0382,
      "grad_norm": 0.46947070956230164,
      "learning_rate": 2.5151301948622235e-06,
      "loss": 0.3498,
      "step": 3495
    },
    {
      "epoch": 0.0384,
      "grad_norm": 0.4513198733329773,
      "learning_rate": 2.5121017277966875e-06,
      "loss": 0.3557,
      "step": 3496
    },
    {
      "epoch": 0.0386,
      "grad_norm": 0.4403994083404541,
      "learning_rate": 2.509074473302546e-06,
      "loss": 0.3362,
      "step": 3497
    },
    {
      "epoch": 0.0388,
      "grad_norm": 0.41966721415519714,
      "learning_rate": 2.506048432855247e-06,
      "loss": 0.3307,
      "step": 3498
    },
    {
      "epoch": 0.039,
      "grad_norm": 0.387683242559433,
      "learning_rate": 2.5030236079296443e-06,
      "loss": 0.3343,
      "step": 3499
    },
    {
      "epoch": 0.0392,
      "grad_norm": 0.5010925531387329,
      "learning_rate": 2.5000000000000015e-06,
      "loss": 0.3368,
      "step": 3500
    },
    {
      "epoch": 0.0394,
      "grad_norm": 0.6690516471862793,
      "learning_rate": 2.496977610539988e-06,
      "loss": 0.3318,
      "step": 3501
    },
    {
      "epoch": 0.0396,
      "grad_norm": 0.426057368516922,
      "learning_rate": 2.49395644102268e-06,
      "loss": 0.3466,
      "step": 3502
    },
    {
      "epoch": 0.0398,
      "grad_norm": 0.4596046507358551,
      "learning_rate": 2.4909364929205575e-06,
      "loss": 0.3237,
      "step": 3503
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5733073353767395,
      "learning_rate": 2.48791776770551e-06,
      "loss": 0.3183,
      "step": 3504
    },
    {
      "epoch": 0.0402,
      "grad_norm": 0.46242615580558777,
      "learning_rate": 2.484900266848825e-06,
      "loss": 0.3492,
      "step": 3505
    },
    {
      "epoch": 0.0404,
      "grad_norm": 0.4486474394798279,
      "learning_rate": 2.4818839918211963e-06,
      "loss": 0.3037,
      "step": 3506
    },
    {
      "epoch": 0.0406,
      "grad_norm": 0.41522443294525146,
      "learning_rate": 2.4788689440927193e-06,
      "loss": 0.32,
      "step": 3507
    },
    {
      "epoch": 0.0408,
      "grad_norm": 1.4446535110473633,
      "learning_rate": 2.4758551251328923e-06,
      "loss": 0.3434,
      "step": 3508
    },
    {
      "epoch": 0.041,
      "grad_norm": 0.4862390458583832,
      "learning_rate": 2.4728425364106136e-06,
      "loss": 0.3425,
      "step": 3509
    },
    {
      "epoch": 0.0412,
      "grad_norm": 0.4635170102119446,
      "learning_rate": 2.469831179394182e-06,
      "loss": 0.327,
      "step": 3510
    },
    {
      "epoch": 0.0414,
      "grad_norm": 0.4097064733505249,
      "learning_rate": 2.4668210555512974e-06,
      "loss": 0.3309,
      "step": 3511
    },
    {
      "epoch": 0.0416,
      "grad_norm": 0.4228954613208771,
      "learning_rate": 2.4638121663490546e-06,
      "loss": 0.3201,
      "step": 3512
    },
    {
      "epoch": 0.0418,
      "grad_norm": 0.3871912956237793,
      "learning_rate": 2.4608045132539536e-06,
      "loss": 0.3414,
      "step": 3513
    },
    {
      "epoch": 0.042,
      "grad_norm": 0.45945706963539124,
      "learning_rate": 2.4577980977318866e-06,
      "loss": 0.336,
      "step": 3514
    },
    {
      "epoch": 0.0422,
      "grad_norm": 0.606484055519104,
      "learning_rate": 2.4547929212481436e-06,
      "loss": 0.2902,
      "step": 3515
    },
    {
      "epoch": 0.0424,
      "grad_norm": 0.5311645269393921,
      "learning_rate": 2.4517889852674114e-06,
      "loss": 0.3369,
      "step": 3516
    },
    {
      "epoch": 0.0426,
      "grad_norm": 0.3727840781211853,
      "learning_rate": 2.448786291253772e-06,
      "loss": 0.3206,
      "step": 3517
    },
    {
      "epoch": 0.0428,
      "grad_norm": 0.5079643130302429,
      "learning_rate": 2.4457848406707014e-06,
      "loss": 0.3569,
      "step": 3518
    },
    {
      "epoch": 0.043,
      "grad_norm": 0.48289650678634644,
      "learning_rate": 2.442784634981071e-06,
      "loss": 0.317,
      "step": 3519
    },
    {
      "epoch": 0.0432,
      "grad_norm": 0.4501470923423767,
      "learning_rate": 2.4397856756471435e-06,
      "loss": 0.3208,
      "step": 3520
    },
    {
      "epoch": 0.0434,
      "grad_norm": 0.41722118854522705,
      "learning_rate": 2.4367879641305757e-06,
      "loss": 0.3284,
      "step": 3521
    },
    {
      "epoch": 0.0436,
      "grad_norm": 0.4725840091705322,
      "learning_rate": 2.4337915018924147e-06,
      "loss": 0.3398,
      "step": 3522
    },
    {
      "epoch": 0.0438,
      "grad_norm": 0.4678660035133362,
      "learning_rate": 2.4307962903931025e-06,
      "loss": 0.3614,
      "step": 3523
    },
    {
      "epoch": 0.044,
      "grad_norm": 0.5066635012626648,
      "learning_rate": 2.4278023310924676e-06,
      "loss": 0.3434,
      "step": 3524
    },
    {
      "epoch": 0.0442,
      "grad_norm": 0.493000864982605,
      "learning_rate": 2.424809625449729e-06,
      "loss": 0.3436,
      "step": 3525
    },
    {
      "epoch": 0.0444,
      "grad_norm": 0.5127754807472229,
      "learning_rate": 2.4218181749234954e-06,
      "loss": 0.3625,
      "step": 3526
    },
    {
      "epoch": 0.0446,
      "grad_norm": 0.579617977142334,
      "learning_rate": 2.418827980971763e-06,
      "loss": 0.3169,
      "step": 3527
    },
    {
      "epoch": 0.0448,
      "grad_norm": 0.3908967077732086,
      "learning_rate": 2.415839045051916e-06,
      "loss": 0.2988,
      "step": 3528
    },
    {
      "epoch": 0.045,
      "grad_norm": 0.3703458905220032,
      "learning_rate": 2.412851368620726e-06,
      "loss": 0.3461,
      "step": 3529
    },
    {
      "epoch": 0.0452,
      "grad_norm": 0.42432889342308044,
      "learning_rate": 2.40986495313435e-06,
      "loss": 0.3289,
      "step": 3530
    },
    {
      "epoch": 0.0454,
      "grad_norm": 0.5582854151725769,
      "learning_rate": 2.4068798000483306e-06,
      "loss": 0.3276,
      "step": 3531
    },
    {
      "epoch": 0.0456,
      "grad_norm": 0.4227972626686096,
      "learning_rate": 2.403895910817593e-06,
      "loss": 0.3226,
      "step": 3532
    },
    {
      "epoch": 0.0458,
      "grad_norm": 0.466214120388031,
      "learning_rate": 2.4009132868964525e-06,
      "loss": 0.3288,
      "step": 3533
    },
    {
      "epoch": 0.046,
      "grad_norm": 0.44743287563323975,
      "learning_rate": 2.3979319297386035e-06,
      "loss": 0.3392,
      "step": 3534
    },
    {
      "epoch": 0.0462,
      "grad_norm": 0.5131268501281738,
      "learning_rate": 2.39495184079712e-06,
      "loss": 0.3531,
      "step": 3535
    },
    {
      "epoch": 0.0464,
      "grad_norm": 0.5148587226867676,
      "learning_rate": 2.391973021524461e-06,
      "loss": 0.301,
      "step": 3536
    },
    {
      "epoch": 0.0466,
      "grad_norm": 0.9175434112548828,
      "learning_rate": 2.3889954733724708e-06,
      "loss": 0.3067,
      "step": 3537
    },
    {
      "epoch": 0.0468,
      "grad_norm": 0.39549851417541504,
      "learning_rate": 2.3860191977923673e-06,
      "loss": 0.3039,
      "step": 3538
    },
    {
      "epoch": 0.047,
      "grad_norm": 0.4303092956542969,
      "learning_rate": 2.3830441962347528e-06,
      "loss": 0.3274,
      "step": 3539
    },
    {
      "epoch": 0.0472,
      "grad_norm": 0.6006001234054565,
      "learning_rate": 2.380070470149605e-06,
      "loss": 0.3438,
      "step": 3540
    },
    {
      "epoch": 0.0474,
      "grad_norm": 0.4413866400718689,
      "learning_rate": 2.3770980209862814e-06,
      "loss": 0.3033,
      "step": 3541
    },
    {
      "epoch": 0.0476,
      "grad_norm": 0.5924910306930542,
      "learning_rate": 2.3741268501935212e-06,
      "loss": 0.3403,
      "step": 3542
    },
    {
      "epoch": 0.0478,
      "grad_norm": 0.5586074590682983,
      "learning_rate": 2.3711569592194363e-06,
      "loss": 0.3333,
      "step": 3543
    },
    {
      "epoch": 0.048,
      "grad_norm": 0.42020246386528015,
      "learning_rate": 2.3681883495115114e-06,
      "loss": 0.3508,
      "step": 3544
    },
    {
      "epoch": 0.0482,
      "grad_norm": 0.36452654004096985,
      "learning_rate": 2.3652210225166122e-06,
      "loss": 0.2987,
      "step": 3545
    },
    {
      "epoch": 0.0484,
      "grad_norm": 0.48348623514175415,
      "learning_rate": 2.3622549796809807e-06,
      "loss": 0.3447,
      "step": 3546
    },
    {
      "epoch": 0.0486,
      "grad_norm": 0.5375544428825378,
      "learning_rate": 2.3592902224502284e-06,
      "loss": 0.3364,
      "step": 3547
    },
    {
      "epoch": 0.0488,
      "grad_norm": 0.46575313806533813,
      "learning_rate": 2.356326752269342e-06,
      "loss": 0.3449,
      "step": 3548
    },
    {
      "epoch": 0.049,
      "grad_norm": 0.7992476224899292,
      "learning_rate": 2.353364570582681e-06,
      "loss": 0.3627,
      "step": 3549
    },
    {
      "epoch": 0.0492,
      "grad_norm": 0.9565929770469666,
      "learning_rate": 2.3504036788339763e-06,
      "loss": 0.3403,
      "step": 3550
    },
    {
      "epoch": 0.0494,
      "grad_norm": 0.4585828483104706,
      "learning_rate": 2.3474440784663287e-06,
      "loss": 0.3494,
      "step": 3551
    },
    {
      "epoch": 0.0496,
      "grad_norm": 0.42233529686927795,
      "learning_rate": 2.344485770922218e-06,
      "loss": 0.3262,
      "step": 3552
    },
    {
      "epoch": 0.0498,
      "grad_norm": 0.5208999514579773,
      "learning_rate": 2.3415287576434807e-06,
      "loss": 0.3282,
      "step": 3553
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.5148120522499084,
      "learning_rate": 2.338573040071332e-06,
      "loss": 0.3436,
      "step": 3554
    },
    {
      "epoch": 0.0502,
      "grad_norm": 0.42887723445892334,
      "learning_rate": 2.3356186196463497e-06,
      "loss": 0.3276,
      "step": 3555
    },
    {
      "epoch": 0.0504,
      "grad_norm": 0.6544498205184937,
      "learning_rate": 2.3326654978084872e-06,
      "loss": 0.3327,
      "step": 3556
    },
    {
      "epoch": 0.0506,
      "grad_norm": 0.49056872725486755,
      "learning_rate": 2.329713675997058e-06,
      "loss": 0.3563,
      "step": 3557
    },
    {
      "epoch": 0.0508,
      "grad_norm": 0.42531266808509827,
      "learning_rate": 2.3267631556507443e-06,
      "loss": 0.3338,
      "step": 3558
    },
    {
      "epoch": 0.051,
      "grad_norm": 0.47536543011665344,
      "learning_rate": 2.323813938207593e-06,
      "loss": 0.35,
      "step": 3559
    },
    {
      "epoch": 0.0512,
      "grad_norm": 0.45244020223617554,
      "learning_rate": 2.320866025105016e-06,
      "loss": 0.3221,
      "step": 3560
    },
    {
      "epoch": 0.0514,
      "grad_norm": 0.5683878660202026,
      "learning_rate": 2.3179194177797954e-06,
      "loss": 0.3831,
      "step": 3561
    },
    {
      "epoch": 0.0516,
      "grad_norm": 0.4703058898448944,
      "learning_rate": 2.3149741176680666e-06,
      "loss": 0.3399,
      "step": 3562
    },
    {
      "epoch": 0.0518,
      "grad_norm": 0.45234057307243347,
      "learning_rate": 2.312030126205335e-06,
      "loss": 0.3516,
      "step": 3563
    },
    {
      "epoch": 0.052,
      "grad_norm": 0.4383130669593811,
      "learning_rate": 2.309087444826464e-06,
      "loss": 0.3258,
      "step": 3564
    },
    {
      "epoch": 0.0522,
      "grad_norm": 0.4220123291015625,
      "learning_rate": 2.3061460749656844e-06,
      "loss": 0.3374,
      "step": 3565
    },
    {
      "epoch": 0.0524,
      "grad_norm": 0.4449930489063263,
      "learning_rate": 2.303206018056583e-06,
      "loss": 0.36,
      "step": 3566
    },
    {
      "epoch": 0.0526,
      "grad_norm": 1.4542874097824097,
      "learning_rate": 2.3002672755321076e-06,
      "loss": 0.3563,
      "step": 3567
    },
    {
      "epoch": 0.0528,
      "grad_norm": 0.3846484422683716,
      "learning_rate": 2.297329848824565e-06,
      "loss": 0.3405,
      "step": 3568
    },
    {
      "epoch": 0.053,
      "grad_norm": 0.4591273367404938,
      "learning_rate": 2.294393739365621e-06,
      "loss": 0.3481,
      "step": 3569
    },
    {
      "epoch": 0.0532,
      "grad_norm": 0.4784490466117859,
      "learning_rate": 2.2914589485863015e-06,
      "loss": 0.3411,
      "step": 3570
    },
    {
      "epoch": 0.0534,
      "grad_norm": 0.44150230288505554,
      "learning_rate": 2.288525477916986e-06,
      "loss": 0.3236,
      "step": 3571
    },
    {
      "epoch": 0.0536,
      "grad_norm": 0.5765684843063354,
      "learning_rate": 2.285593328787414e-06,
      "loss": 0.3438,
      "step": 3572
    },
    {
      "epoch": 0.0538,
      "grad_norm": 0.4575710892677307,
      "learning_rate": 2.282662502626678e-06,
      "loss": 0.3293,
      "step": 3573
    },
    {
      "epoch": 0.054,
      "grad_norm": 0.41363170742988586,
      "learning_rate": 2.2797330008632255e-06,
      "loss": 0.3053,
      "step": 3574
    },
    {
      "epoch": 0.0542,
      "grad_norm": 0.7142214179039001,
      "learning_rate": 2.2768048249248648e-06,
      "loss": 0.3512,
      "step": 3575
    },
    {
      "epoch": 0.0544,
      "grad_norm": 0.46187251806259155,
      "learning_rate": 2.27387797623875e-06,
      "loss": 0.3368,
      "step": 3576
    },
    {
      "epoch": 0.0546,
      "grad_norm": 0.747895359992981,
      "learning_rate": 2.2709524562313923e-06,
      "loss": 0.3237,
      "step": 3577
    },
    {
      "epoch": 0.0548,
      "grad_norm": 0.44880735874176025,
      "learning_rate": 2.268028266328655e-06,
      "loss": 0.3281,
      "step": 3578
    },
    {
      "epoch": 0.055,
      "grad_norm": 0.4160294234752655,
      "learning_rate": 2.265105407955752e-06,
      "loss": 0.3122,
      "step": 3579
    },
    {
      "epoch": 0.0552,
      "grad_norm": 0.40512046217918396,
      "learning_rate": 2.2621838825372496e-06,
      "loss": 0.3231,
      "step": 3580
    },
    {
      "epoch": 0.0554,
      "grad_norm": 0.46785733103752136,
      "learning_rate": 2.2592636914970633e-06,
      "loss": 0.3311,
      "step": 3581
    },
    {
      "epoch": 0.0556,
      "grad_norm": 0.49979597330093384,
      "learning_rate": 2.256344836258459e-06,
      "loss": 0.3245,
      "step": 3582
    },
    {
      "epoch": 0.0558,
      "grad_norm": 0.6598399877548218,
      "learning_rate": 2.2534273182440515e-06,
      "loss": 0.3445,
      "step": 3583
    },
    {
      "epoch": 0.056,
      "grad_norm": 0.4144494831562042,
      "learning_rate": 2.250511138875801e-06,
      "loss": 0.3103,
      "step": 3584
    },
    {
      "epoch": 0.0562,
      "grad_norm": 0.621495246887207,
      "learning_rate": 2.2475962995750224e-06,
      "loss": 0.3381,
      "step": 3585
    },
    {
      "epoch": 0.0564,
      "grad_norm": 0.44165757298469543,
      "learning_rate": 2.24468280176237e-06,
      "loss": 0.3695,
      "step": 3586
    },
    {
      "epoch": 0.0566,
      "grad_norm": 0.3552791476249695,
      "learning_rate": 2.2417706468578495e-06,
      "loss": 0.3099,
      "step": 3587
    },
    {
      "epoch": 0.0568,
      "grad_norm": 0.5630825161933899,
      "learning_rate": 2.2388598362808074e-06,
      "loss": 0.3495,
      "step": 3588
    },
    {
      "epoch": 0.057,
      "grad_norm": 0.48521244525909424,
      "learning_rate": 2.235950371449938e-06,
      "loss": 0.3413,
      "step": 3589
    },
    {
      "epoch": 0.0572,
      "grad_norm": 0.627043604850769,
      "learning_rate": 2.23304225378328e-06,
      "loss": 0.3751,
      "step": 3590
    },
    {
      "epoch": 0.0574,
      "grad_norm": 0.39764416217803955,
      "learning_rate": 2.2301354846982148e-06,
      "loss": 0.336,
      "step": 3591
    },
    {
      "epoch": 0.0576,
      "grad_norm": 0.42784228920936584,
      "learning_rate": 2.2272300656114648e-06,
      "loss": 0.3154,
      "step": 3592
    },
    {
      "epoch": 0.0578,
      "grad_norm": 0.43941164016723633,
      "learning_rate": 2.224325997939095e-06,
      "loss": 0.3414,
      "step": 3593
    },
    {
      "epoch": 0.058,
      "grad_norm": 0.5032743215560913,
      "learning_rate": 2.221423283096517e-06,
      "loss": 0.363,
      "step": 3594
    },
    {
      "epoch": 0.0582,
      "grad_norm": 0.48117393255233765,
      "learning_rate": 2.218521922498476e-06,
      "loss": 0.3176,
      "step": 3595
    },
    {
      "epoch": 0.0584,
      "grad_norm": 0.6013593673706055,
      "learning_rate": 2.2156219175590623e-06,
      "loss": 0.3304,
      "step": 3596
    },
    {
      "epoch": 0.0586,
      "grad_norm": 0.4156055450439453,
      "learning_rate": 2.212723269691697e-06,
      "loss": 0.3229,
      "step": 3597
    },
    {
      "epoch": 0.0588,
      "grad_norm": 0.8366389870643616,
      "learning_rate": 2.209825980309151e-06,
      "loss": 0.3592,
      "step": 3598
    },
    {
      "epoch": 0.059,
      "grad_norm": 0.4285179078578949,
      "learning_rate": 2.2069300508235273e-06,
      "loss": 0.3114,
      "step": 3599
    },
    {
      "epoch": 0.0592,
      "grad_norm": 0.46617671847343445,
      "learning_rate": 2.204035482646267e-06,
      "loss": 0.3165,
      "step": 3600
    },
    {
      "epoch": 0.0594,
      "grad_norm": 0.495372474193573,
      "learning_rate": 2.201142277188146e-06,
      "loss": 0.3078,
      "step": 3601
    },
    {
      "epoch": 0.0596,
      "grad_norm": 0.42072397470474243,
      "learning_rate": 2.1982504358592777e-06,
      "loss": 0.3183,
      "step": 3602
    },
    {
      "epoch": 0.0598,
      "grad_norm": 0.49814656376838684,
      "learning_rate": 2.19535996006911e-06,
      "loss": 0.3507,
      "step": 3603
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5412299633026123,
      "learning_rate": 2.192470851226428e-06,
      "loss": 0.3301,
      "step": 3604
    },
    {
      "epoch": 0.0602,
      "grad_norm": 0.3983375132083893,
      "learning_rate": 2.1895831107393485e-06,
      "loss": 0.3398,
      "step": 3605
    },
    {
      "epoch": 0.0604,
      "grad_norm": 0.4437698721885681,
      "learning_rate": 2.1866967400153184e-06,
      "loss": 0.3099,
      "step": 3606
    },
    {
      "epoch": 0.0606,
      "grad_norm": 0.4280250370502472,
      "learning_rate": 2.183811740461118e-06,
      "loss": 0.3084,
      "step": 3607
    },
    {
      "epoch": 0.0608,
      "grad_norm": 0.5017187595367432,
      "learning_rate": 2.1809281134828663e-06,
      "loss": 0.3331,
      "step": 3608
    },
    {
      "epoch": 0.061,
      "grad_norm": 0.4714476466178894,
      "learning_rate": 2.1780458604860056e-06,
      "loss": 0.3354,
      "step": 3609
    },
    {
      "epoch": 0.0612,
      "grad_norm": 0.5742354393005371,
      "learning_rate": 2.175164982875311e-06,
      "loss": 0.3245,
      "step": 3610
    },
    {
      "epoch": 0.0614,
      "grad_norm": 0.47686928510665894,
      "learning_rate": 2.1722854820548873e-06,
      "loss": 0.3334,
      "step": 3611
    },
    {
      "epoch": 0.0616,
      "grad_norm": 0.3971303403377533,
      "learning_rate": 2.1694073594281663e-06,
      "loss": 0.3459,
      "step": 3612
    },
    {
      "epoch": 0.0618,
      "grad_norm": 0.5260752439498901,
      "learning_rate": 2.1665306163979132e-06,
      "loss": 0.3316,
      "step": 3613
    },
    {
      "epoch": 0.062,
      "grad_norm": 1.0677480697631836,
      "learning_rate": 2.1636552543662187e-06,
      "loss": 0.3312,
      "step": 3614
    },
    {
      "epoch": 0.0622,
      "grad_norm": 0.39851805567741394,
      "learning_rate": 2.1607812747344955e-06,
      "loss": 0.3272,
      "step": 3615
    },
    {
      "epoch": 0.0624,
      "grad_norm": 0.405396431684494,
      "learning_rate": 2.157908678903487e-06,
      "loss": 0.3238,
      "step": 3616
    },
    {
      "epoch": 0.0626,
      "grad_norm": 0.482421875,
      "learning_rate": 2.1550374682732605e-06,
      "loss": 0.3206,
      "step": 3617
    },
    {
      "epoch": 0.0628,
      "grad_norm": 0.4469122886657715,
      "learning_rate": 2.152167644243213e-06,
      "loss": 0.3474,
      "step": 3618
    },
    {
      "epoch": 0.063,
      "grad_norm": 0.5237732529640198,
      "learning_rate": 2.14929920821206e-06,
      "loss": 0.3229,
      "step": 3619
    },
    {
      "epoch": 0.0632,
      "grad_norm": 0.4422334134578705,
      "learning_rate": 2.146432161577842e-06,
      "loss": 0.335,
      "step": 3620
    },
    {
      "epoch": 0.0634,
      "grad_norm": 0.48463571071624756,
      "learning_rate": 2.1435665057379233e-06,
      "loss": 0.3445,
      "step": 3621
    },
    {
      "epoch": 0.0636,
      "grad_norm": 0.5597214102745056,
      "learning_rate": 2.140702242088987e-06,
      "loss": 0.308,
      "step": 3622
    },
    {
      "epoch": 0.0638,
      "grad_norm": 0.49647748470306396,
      "learning_rate": 2.137839372027047e-06,
      "loss": 0.3521,
      "step": 3623
    },
    {
      "epoch": 0.064,
      "grad_norm": 0.7483525276184082,
      "learning_rate": 2.134977896947425e-06,
      "loss": 0.3538,
      "step": 3624
    },
    {
      "epoch": 0.0642,
      "grad_norm": 0.540168285369873,
      "learning_rate": 2.132117818244771e-06,
      "loss": 0.2932,
      "step": 3625
    },
    {
      "epoch": 0.0644,
      "grad_norm": 0.5095276832580566,
      "learning_rate": 2.1292591373130515e-06,
      "loss": 0.3304,
      "step": 3626
    },
    {
      "epoch": 0.0646,
      "grad_norm": 0.563732922077179,
      "learning_rate": 2.1264018555455563e-06,
      "loss": 0.3658,
      "step": 3627
    },
    {
      "epoch": 0.0648,
      "grad_norm": 0.5215808153152466,
      "learning_rate": 2.1235459743348874e-06,
      "loss": 0.3423,
      "step": 3628
    },
    {
      "epoch": 0.065,
      "grad_norm": 0.441879004240036,
      "learning_rate": 2.1206914950729673e-06,
      "loss": 0.3174,
      "step": 3629
    },
    {
      "epoch": 0.0652,
      "grad_norm": 0.4713680148124695,
      "learning_rate": 2.1178384191510344e-06,
      "loss": 0.3212,
      "step": 3630
    },
    {
      "epoch": 0.0654,
      "grad_norm": 0.46990421414375305,
      "learning_rate": 2.114986747959643e-06,
      "loss": 0.3153,
      "step": 3631
    },
    {
      "epoch": 0.0656,
      "grad_norm": 0.5332986116409302,
      "learning_rate": 2.112136482888663e-06,
      "loss": 0.3601,
      "step": 3632
    },
    {
      "epoch": 0.0658,
      "grad_norm": 0.4816812574863434,
      "learning_rate": 2.1092876253272793e-06,
      "loss": 0.3489,
      "step": 3633
    },
    {
      "epoch": 0.066,
      "grad_norm": 0.457678884267807,
      "learning_rate": 2.10644017666399e-06,
      "loss": 0.3434,
      "step": 3634
    },
    {
      "epoch": 0.0662,
      "grad_norm": 0.45846670866012573,
      "learning_rate": 2.103594138286607e-06,
      "loss": 0.3535,
      "step": 3635
    },
    {
      "epoch": 0.0664,
      "grad_norm": 0.5330760478973389,
      "learning_rate": 2.100749511582254e-06,
      "loss": 0.3233,
      "step": 3636
    },
    {
      "epoch": 0.0666,
      "grad_norm": 0.49907106161117554,
      "learning_rate": 2.09790629793737e-06,
      "loss": 0.3333,
      "step": 3637
    },
    {
      "epoch": 0.0668,
      "grad_norm": 0.4328741431236267,
      "learning_rate": 2.095064498737701e-06,
      "loss": 0.3311,
      "step": 3638
    },
    {
      "epoch": 0.067,
      "grad_norm": 0.4330531060695648,
      "learning_rate": 2.0922241153683064e-06,
      "loss": 0.3196,
      "step": 3639
    },
    {
      "epoch": 0.0672,
      "grad_norm": 0.5992621779441833,
      "learning_rate": 2.0893851492135536e-06,
      "loss": 0.3262,
      "step": 3640
    },
    {
      "epoch": 0.0674,
      "grad_norm": 0.4189988970756531,
      "learning_rate": 2.0865476016571206e-06,
      "loss": 0.324,
      "step": 3641
    },
    {
      "epoch": 0.0676,
      "grad_norm": 0.46317434310913086,
      "learning_rate": 2.083711474081993e-06,
      "loss": 0.3424,
      "step": 3642
    },
    {
      "epoch": 0.0678,
      "grad_norm": 0.40380099415779114,
      "learning_rate": 2.080876767870466e-06,
      "loss": 0.3121,
      "step": 3643
    },
    {
      "epoch": 0.068,
      "grad_norm": 0.4796511232852936,
      "learning_rate": 2.07804348440414e-06,
      "loss": 0.347,
      "step": 3644
    },
    {
      "epoch": 0.0682,
      "grad_norm": 0.471316933631897,
      "learning_rate": 2.075211625063923e-06,
      "loss": 0.3386,
      "step": 3645
    },
    {
      "epoch": 0.0684,
      "grad_norm": 0.4071754813194275,
      "learning_rate": 2.0723811912300295e-06,
      "loss": 0.3129,
      "step": 3646
    },
    {
      "epoch": 0.0686,
      "grad_norm": 0.4569539427757263,
      "learning_rate": 2.0695521842819788e-06,
      "loss": 0.3285,
      "step": 3647
    },
    {
      "epoch": 0.0688,
      "grad_norm": 0.5148619413375854,
      "learning_rate": 2.066724605598594e-06,
      "loss": 0.3331,
      "step": 3648
    },
    {
      "epoch": 0.069,
      "grad_norm": 0.6910401582717896,
      "learning_rate": 2.063898456558002e-06,
      "loss": 0.3699,
      "step": 3649
    },
    {
      "epoch": 0.0692,
      "grad_norm": 0.4877273738384247,
      "learning_rate": 2.061073738537635e-06,
      "loss": 0.3207,
      "step": 3650
    },
    {
      "epoch": 0.0694,
      "grad_norm": 1.1270793676376343,
      "learning_rate": 2.0582504529142248e-06,
      "loss": 0.3092,
      "step": 3651
    },
    {
      "epoch": 0.0696,
      "grad_norm": 0.4164164960384369,
      "learning_rate": 2.0554286010638076e-06,
      "loss": 0.3543,
      "step": 3652
    },
    {
      "epoch": 0.0698,
      "grad_norm": 0.6247628927230835,
      "learning_rate": 2.0526081843617183e-06,
      "loss": 0.327,
      "step": 3653
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.4372577965259552,
      "learning_rate": 2.049789204182596e-06,
      "loss": 0.3345,
      "step": 3654
    },
    {
      "epoch": 0.0702,
      "grad_norm": 0.39212697744369507,
      "learning_rate": 2.046971661900373e-06,
      "loss": 0.3071,
      "step": 3655
    },
    {
      "epoch": 0.0704,
      "grad_norm": 0.5134119391441345,
      "learning_rate": 2.04415555888829e-06,
      "loss": 0.3196,
      "step": 3656
    },
    {
      "epoch": 0.0706,
      "grad_norm": 0.3690548837184906,
      "learning_rate": 2.04134089651888e-06,
      "loss": 0.3111,
      "step": 3657
    },
    {
      "epoch": 0.0708,
      "grad_norm": 0.6839541792869568,
      "learning_rate": 2.0385276761639768e-06,
      "loss": 0.3241,
      "step": 3658
    },
    {
      "epoch": 0.071,
      "grad_norm": 0.6222180724143982,
      "learning_rate": 2.035715899194704e-06,
      "loss": 0.3342,
      "step": 3659
    },
    {
      "epoch": 0.0712,
      "grad_norm": 5.458777904510498,
      "learning_rate": 2.0329055669814936e-06,
      "loss": 0.3182,
      "step": 3660
    },
    {
      "epoch": 0.0714,
      "grad_norm": 0.5853318572044373,
      "learning_rate": 2.030096680894065e-06,
      "loss": 0.3317,
      "step": 3661
    },
    {
      "epoch": 0.0716,
      "grad_norm": 0.4050430655479431,
      "learning_rate": 2.027289242301435e-06,
      "loss": 0.3423,
      "step": 3662
    },
    {
      "epoch": 0.0718,
      "grad_norm": 0.5086203217506409,
      "learning_rate": 2.0244832525719155e-06,
      "loss": 0.3384,
      "step": 3663
    },
    {
      "epoch": 0.072,
      "grad_norm": 0.6329050064086914,
      "learning_rate": 2.02167871307311e-06,
      "loss": 0.3425,
      "step": 3664
    },
    {
      "epoch": 0.0722,
      "grad_norm": 0.5085400938987732,
      "learning_rate": 2.0188756251719204e-06,
      "loss": 0.3562,
      "step": 3665
    },
    {
      "epoch": 0.0724,
      "grad_norm": 0.5617877840995789,
      "learning_rate": 2.016073990234536e-06,
      "loss": 0.3455,
      "step": 3666
    },
    {
      "epoch": 0.0726,
      "grad_norm": 0.49785006046295166,
      "learning_rate": 2.0132738096264415e-06,
      "loss": 0.3399,
      "step": 3667
    },
    {
      "epoch": 0.0728,
      "grad_norm": 0.4524686932563782,
      "learning_rate": 2.0104750847124075e-06,
      "loss": 0.3407,
      "step": 3668
    },
    {
      "epoch": 0.073,
      "grad_norm": 0.40998414158821106,
      "learning_rate": 2.007677816856498e-06,
      "loss": 0.3577,
      "step": 3669
    },
    {
      "epoch": 0.0732,
      "grad_norm": 0.5640161037445068,
      "learning_rate": 2.0048820074220716e-06,
      "loss": 0.3407,
      "step": 3670
    },
    {
      "epoch": 0.0734,
      "grad_norm": 0.5198290944099426,
      "learning_rate": 2.002087657771769e-06,
      "loss": 0.3216,
      "step": 3671
    },
    {
      "epoch": 0.0736,
      "grad_norm": 0.429722398519516,
      "learning_rate": 1.999294769267523e-06,
      "loss": 0.3713,
      "step": 3672
    },
    {
      "epoch": 0.0738,
      "grad_norm": 0.5868404507637024,
      "learning_rate": 1.996503343270554e-06,
      "loss": 0.3412,
      "step": 3673
    },
    {
      "epoch": 0.074,
      "grad_norm": 0.4765988886356354,
      "learning_rate": 1.9937133811413666e-06,
      "loss": 0.3639,
      "step": 3674
    },
    {
      "epoch": 0.0742,
      "grad_norm": 0.45428621768951416,
      "learning_rate": 1.990924884239758e-06,
      "loss": 0.3405,
      "step": 3675
    },
    {
      "epoch": 0.0744,
      "grad_norm": 0.6045084595680237,
      "learning_rate": 1.988137853924808e-06,
      "loss": 0.3114,
      "step": 3676
    },
    {
      "epoch": 0.0746,
      "grad_norm": 0.5369136333465576,
      "learning_rate": 1.9853522915548777e-06,
      "loss": 0.3118,
      "step": 3677
    },
    {
      "epoch": 0.0748,
      "grad_norm": 0.5271263718605042,
      "learning_rate": 1.9825681984876173e-06,
      "loss": 0.3307,
      "step": 3678
    },
    {
      "epoch": 0.075,
      "grad_norm": 0.41690707206726074,
      "learning_rate": 1.979785576079961e-06,
      "loss": 0.3363,
      "step": 3679
    },
    {
      "epoch": 0.0752,
      "grad_norm": 0.7158898115158081,
      "learning_rate": 1.977004425688126e-06,
      "loss": 0.3454,
      "step": 3680
    },
    {
      "epoch": 0.0754,
      "grad_norm": 0.4594719409942627,
      "learning_rate": 1.97422474866761e-06,
      "loss": 0.3455,
      "step": 3681
    },
    {
      "epoch": 0.0756,
      "grad_norm": 0.6789016723632812,
      "learning_rate": 1.9714465463731934e-06,
      "loss": 0.3131,
      "step": 3682
    },
    {
      "epoch": 0.0758,
      "grad_norm": 0.4973239600658417,
      "learning_rate": 1.9686698201589395e-06,
      "loss": 0.3549,
      "step": 3683
    },
    {
      "epoch": 0.076,
      "grad_norm": 0.5333765745162964,
      "learning_rate": 1.9658945713781883e-06,
      "loss": 0.3125,
      "step": 3684
    },
    {
      "epoch": 0.0762,
      "grad_norm": 0.4966120719909668,
      "learning_rate": 1.9631208013835677e-06,
      "loss": 0.374,
      "step": 3685
    },
    {
      "epoch": 0.0764,
      "grad_norm": 0.39230912923812866,
      "learning_rate": 1.9603485115269743e-06,
      "loss": 0.3104,
      "step": 3686
    },
    {
      "epoch": 0.0766,
      "grad_norm": 0.587860643863678,
      "learning_rate": 1.9575777031595906e-06,
      "loss": 0.3192,
      "step": 3687
    },
    {
      "epoch": 0.0768,
      "grad_norm": 0.42578378319740295,
      "learning_rate": 1.9548083776318727e-06,
      "loss": 0.3432,
      "step": 3688
    },
    {
      "epoch": 0.077,
      "grad_norm": 0.41351011395454407,
      "learning_rate": 1.95204053629356e-06,
      "loss": 0.3376,
      "step": 3689
    },
    {
      "epoch": 0.0772,
      "grad_norm": 0.532352864742279,
      "learning_rate": 1.9492741804936623e-06,
      "loss": 0.3383,
      "step": 3690
    },
    {
      "epoch": 0.0774,
      "grad_norm": 0.625684916973114,
      "learning_rate": 1.946509311580469e-06,
      "loss": 0.3105,
      "step": 3691
    },
    {
      "epoch": 0.0776,
      "grad_norm": 0.42924821376800537,
      "learning_rate": 1.9437459309015426e-06,
      "loss": 0.3487,
      "step": 3692
    },
    {
      "epoch": 0.0778,
      "grad_norm": 0.5408510565757751,
      "learning_rate": 1.94098403980372e-06,
      "loss": 0.3392,
      "step": 3693
    },
    {
      "epoch": 0.078,
      "grad_norm": 0.40344226360321045,
      "learning_rate": 1.938223639633119e-06,
      "loss": 0.3235,
      "step": 3694
    },
    {
      "epoch": 0.0782,
      "grad_norm": 0.440017431974411,
      "learning_rate": 1.9354647317351187e-06,
      "loss": 0.3397,
      "step": 3695
    },
    {
      "epoch": 0.0784,
      "grad_norm": 0.8418397903442383,
      "learning_rate": 1.93270731745438e-06,
      "loss": 0.3345,
      "step": 3696
    },
    {
      "epoch": 0.0786,
      "grad_norm": 0.46864762902259827,
      "learning_rate": 1.929951398134832e-06,
      "loss": 0.3511,
      "step": 3697
    },
    {
      "epoch": 0.0788,
      "grad_norm": 0.5253691673278809,
      "learning_rate": 1.927196975119678e-06,
      "loss": 0.3179,
      "step": 3698
    },
    {
      "epoch": 0.079,
      "grad_norm": 0.4592877924442291,
      "learning_rate": 1.9244440497513895e-06,
      "loss": 0.3675,
      "step": 3699
    },
    {
      "epoch": 0.0792,
      "grad_norm": 0.4816204607486725,
      "learning_rate": 1.9216926233717087e-06,
      "loss": 0.3398,
      "step": 3700
    },
    {
      "epoch": 0.0794,
      "grad_norm": 0.4038502275943756,
      "learning_rate": 1.9189426973216478e-06,
      "loss": 0.3329,
      "step": 3701
    },
    {
      "epoch": 0.0796,
      "grad_norm": 0.4497540593147278,
      "learning_rate": 1.9161942729414876e-06,
      "loss": 0.3292,
      "step": 3702
    },
    {
      "epoch": 0.0798,
      "grad_norm": 0.44770821928977966,
      "learning_rate": 1.913447351570776e-06,
      "loss": 0.3416,
      "step": 3703
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.699696958065033,
      "learning_rate": 1.910701934548329e-06,
      "loss": 0.3075,
      "step": 3704
    },
    {
      "epoch": 0.0802,
      "grad_norm": 0.5490803122520447,
      "learning_rate": 1.90795802321223e-06,
      "loss": 0.3301,
      "step": 3705
    },
    {
      "epoch": 0.0804,
      "grad_norm": 0.4633817970752716,
      "learning_rate": 1.9052156188998284e-06,
      "loss": 0.3221,
      "step": 3706
    },
    {
      "epoch": 0.0806,
      "grad_norm": 0.4292963445186615,
      "learning_rate": 1.9024747229477365e-06,
      "loss": 0.3455,
      "step": 3707
    },
    {
      "epoch": 0.0808,
      "grad_norm": 0.4817541241645813,
      "learning_rate": 1.8997353366918369e-06,
      "loss": 0.3115,
      "step": 3708
    },
    {
      "epoch": 0.081,
      "grad_norm": 0.36195480823516846,
      "learning_rate": 1.896997461467272e-06,
      "loss": 0.3,
      "step": 3709
    },
    {
      "epoch": 0.0812,
      "grad_norm": 0.476612389087677,
      "learning_rate": 1.8942610986084487e-06,
      "loss": 0.3322,
      "step": 3710
    },
    {
      "epoch": 0.0814,
      "grad_norm": 0.5624911189079285,
      "learning_rate": 1.8915262494490366e-06,
      "loss": 0.3218,
      "step": 3711
    },
    {
      "epoch": 0.0816,
      "grad_norm": 0.9110454320907593,
      "learning_rate": 1.8887929153219687e-06,
      "loss": 0.2947,
      "step": 3712
    },
    {
      "epoch": 0.0818,
      "grad_norm": 0.464977890253067,
      "learning_rate": 1.8860610975594384e-06,
      "loss": 0.3249,
      "step": 3713
    },
    {
      "epoch": 0.082,
      "grad_norm": 0.4697114825248718,
      "learning_rate": 1.8833307974929006e-06,
      "loss": 0.3233,
      "step": 3714
    },
    {
      "epoch": 0.0822,
      "grad_norm": 0.4538310170173645,
      "learning_rate": 1.8806020164530702e-06,
      "loss": 0.3302,
      "step": 3715
    },
    {
      "epoch": 0.0824,
      "grad_norm": 0.6603257060050964,
      "learning_rate": 1.8778747557699223e-06,
      "loss": 0.3596,
      "step": 3716
    },
    {
      "epoch": 0.0826,
      "grad_norm": 0.5746002197265625,
      "learning_rate": 1.8751490167726888e-06,
      "loss": 0.3458,
      "step": 3717
    },
    {
      "epoch": 0.0828,
      "grad_norm": 0.393028199672699,
      "learning_rate": 1.8724248007898648e-06,
      "loss": 0.3294,
      "step": 3718
    },
    {
      "epoch": 0.083,
      "grad_norm": 0.4958423972129822,
      "learning_rate": 1.8697021091491991e-06,
      "loss": 0.3436,
      "step": 3719
    },
    {
      "epoch": 0.0832,
      "grad_norm": 0.6519724726676941,
      "learning_rate": 1.8669809431776991e-06,
      "loss": 0.3264,
      "step": 3720
    },
    {
      "epoch": 0.0834,
      "grad_norm": 0.4901706576347351,
      "learning_rate": 1.8642613042016245e-06,
      "loss": 0.3346,
      "step": 3721
    },
    {
      "epoch": 0.0836,
      "grad_norm": 0.3884117007255554,
      "learning_rate": 1.8615431935464984e-06,
      "loss": 0.319,
      "step": 3722
    },
    {
      "epoch": 0.0838,
      "grad_norm": 0.4568096399307251,
      "learning_rate": 1.8588266125370929e-06,
      "loss": 0.2998,
      "step": 3723
    },
    {
      "epoch": 0.084,
      "grad_norm": 0.4341413378715515,
      "learning_rate": 1.8561115624974374e-06,
      "loss": 0.3309,
      "step": 3724
    },
    {
      "epoch": 0.0842,
      "grad_norm": 0.7823662757873535,
      "learning_rate": 1.8533980447508138e-06,
      "loss": 0.3198,
      "step": 3725
    },
    {
      "epoch": 0.0844,
      "grad_norm": 0.44850727915763855,
      "learning_rate": 1.8506860606197564e-06,
      "loss": 0.2975,
      "step": 3726
    },
    {
      "epoch": 0.0846,
      "grad_norm": 0.5522760152816772,
      "learning_rate": 1.8479756114260562e-06,
      "loss": 0.361,
      "step": 3727
    },
    {
      "epoch": 0.0848,
      "grad_norm": 0.5106461644172668,
      "learning_rate": 1.8452666984907519e-06,
      "loss": 0.3422,
      "step": 3728
    },
    {
      "epoch": 0.085,
      "grad_norm": 0.572500467300415,
      "learning_rate": 1.842559323134136e-06,
      "loss": 0.3625,
      "step": 3729
    },
    {
      "epoch": 0.0852,
      "grad_norm": 0.5495997667312622,
      "learning_rate": 1.8398534866757455e-06,
      "loss": 0.3289,
      "step": 3730
    },
    {
      "epoch": 0.0854,
      "grad_norm": 0.44004616141319275,
      "learning_rate": 1.837149190434378e-06,
      "loss": 0.3402,
      "step": 3731
    },
    {
      "epoch": 0.0856,
      "grad_norm": 0.4754388630390167,
      "learning_rate": 1.8344464357280722e-06,
      "loss": 0.3336,
      "step": 3732
    },
    {
      "epoch": 0.0858,
      "grad_norm": 0.7308005094528198,
      "learning_rate": 1.831745223874118e-06,
      "loss": 0.3294,
      "step": 3733
    },
    {
      "epoch": 0.086,
      "grad_norm": 0.45640450716018677,
      "learning_rate": 1.829045556189053e-06,
      "loss": 0.3271,
      "step": 3734
    },
    {
      "epoch": 0.0862,
      "grad_norm": 0.5122544765472412,
      "learning_rate": 1.8263474339886628e-06,
      "loss": 0.3381,
      "step": 3735
    },
    {
      "epoch": 0.0864,
      "grad_norm": 0.509671151638031,
      "learning_rate": 1.8236508585879781e-06,
      "loss": 0.3521,
      "step": 3736
    },
    {
      "epoch": 0.0866,
      "grad_norm": 0.48822125792503357,
      "learning_rate": 1.8209558313012792e-06,
      "loss": 0.3793,
      "step": 3737
    },
    {
      "epoch": 0.0868,
      "grad_norm": 0.5146298408508301,
      "learning_rate": 1.8182623534420906e-06,
      "loss": 0.3056,
      "step": 3738
    },
    {
      "epoch": 0.087,
      "grad_norm": 0.39960938692092896,
      "learning_rate": 1.8155704263231777e-06,
      "loss": 0.3342,
      "step": 3739
    },
    {
      "epoch": 0.0872,
      "grad_norm": 0.4074355959892273,
      "learning_rate": 1.8128800512565514e-06,
      "loss": 0.307,
      "step": 3740
    },
    {
      "epoch": 0.0874,
      "grad_norm": 1.8441591262817383,
      "learning_rate": 1.810191229553473e-06,
      "loss": 0.3557,
      "step": 3741
    },
    {
      "epoch": 0.0876,
      "grad_norm": 0.5311644673347473,
      "learning_rate": 1.807503962524439e-06,
      "loss": 0.3257,
      "step": 3742
    },
    {
      "epoch": 0.0878,
      "grad_norm": 0.43107205629348755,
      "learning_rate": 1.8048182514791901e-06,
      "loss": 0.3123,
      "step": 3743
    },
    {
      "epoch": 0.088,
      "grad_norm": 0.4140705466270447,
      "learning_rate": 1.8021340977267104e-06,
      "loss": 0.3127,
      "step": 3744
    },
    {
      "epoch": 0.0882,
      "grad_norm": 0.7029069662094116,
      "learning_rate": 1.799451502575222e-06,
      "loss": 0.3304,
      "step": 3745
    },
    {
      "epoch": 0.0884,
      "grad_norm": 0.43894535303115845,
      "learning_rate": 1.7967704673321917e-06,
      "loss": 0.3399,
      "step": 3746
    },
    {
      "epoch": 0.0886,
      "grad_norm": 0.6873098611831665,
      "learning_rate": 1.7940909933043243e-06,
      "loss": 0.3206,
      "step": 3747
    },
    {
      "epoch": 0.0888,
      "grad_norm": 0.6070961356163025,
      "learning_rate": 1.7914130817975595e-06,
      "loss": 0.3326,
      "step": 3748
    },
    {
      "epoch": 0.089,
      "grad_norm": 0.5053484439849854,
      "learning_rate": 1.7887367341170781e-06,
      "loss": 0.336,
      "step": 3749
    },
    {
      "epoch": 0.0892,
      "grad_norm": 0.4257158637046814,
      "learning_rate": 1.7860619515673034e-06,
      "loss": 0.3242,
      "step": 3750
    },
    {
      "epoch": 0.0894,
      "grad_norm": 0.4256298542022705,
      "learning_rate": 1.7833887354518902e-06,
      "loss": 0.3426,
      "step": 3751
    },
    {
      "epoch": 0.0896,
      "grad_norm": 0.48420238494873047,
      "learning_rate": 1.7807170870737317e-06,
      "loss": 0.3409,
      "step": 3752
    },
    {
      "epoch": 0.0898,
      "grad_norm": 0.42872220277786255,
      "learning_rate": 1.7780470077349566e-06,
      "loss": 0.3387,
      "step": 3753
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.49937427043914795,
      "learning_rate": 1.7753784987369287e-06,
      "loss": 0.322,
      "step": 3754
    },
    {
      "epoch": 0.0902,
      "grad_norm": 0.39801791310310364,
      "learning_rate": 1.7727115613802465e-06,
      "loss": 0.3195,
      "step": 3755
    },
    {
      "epoch": 0.0904,
      "grad_norm": 0.6078960299491882,
      "learning_rate": 1.770046196964747e-06,
      "loss": 0.3455,
      "step": 3756
    },
    {
      "epoch": 0.0906,
      "grad_norm": 0.4142525792121887,
      "learning_rate": 1.7673824067894912e-06,
      "loss": 0.332,
      "step": 3757
    },
    {
      "epoch": 0.0908,
      "grad_norm": 0.4727976322174072,
      "learning_rate": 1.7647201921527802e-06,
      "loss": 0.3314,
      "step": 3758
    },
    {
      "epoch": 0.091,
      "grad_norm": 0.4947437047958374,
      "learning_rate": 1.762059554352143e-06,
      "loss": 0.3655,
      "step": 3759
    },
    {
      "epoch": 0.0912,
      "grad_norm": 0.4620342552661896,
      "learning_rate": 1.7594004946843458e-06,
      "loss": 0.3213,
      "step": 3760
    },
    {
      "epoch": 0.0914,
      "grad_norm": 0.4346334934234619,
      "learning_rate": 1.7567430144453801e-06,
      "loss": 0.319,
      "step": 3761
    },
    {
      "epoch": 0.0916,
      "grad_norm": 0.4668121337890625,
      "learning_rate": 1.75408711493047e-06,
      "loss": 0.3194,
      "step": 3762
    },
    {
      "epoch": 0.0918,
      "grad_norm": 0.4844874441623688,
      "learning_rate": 1.751432797434068e-06,
      "loss": 0.3354,
      "step": 3763
    },
    {
      "epoch": 0.092,
      "grad_norm": 0.7091814279556274,
      "learning_rate": 1.7487800632498547e-06,
      "loss": 0.3194,
      "step": 3764
    },
    {
      "epoch": 0.0922,
      "grad_norm": 0.5031542778015137,
      "learning_rate": 1.746128913670746e-06,
      "loss": 0.351,
      "step": 3765
    },
    {
      "epoch": 0.0924,
      "grad_norm": 0.6244412660598755,
      "learning_rate": 1.7434793499888746e-06,
      "loss": 0.3246,
      "step": 3766
    },
    {
      "epoch": 0.0926,
      "grad_norm": 0.5106151103973389,
      "learning_rate": 1.7408313734956074e-06,
      "loss": 0.3394,
      "step": 3767
    },
    {
      "epoch": 0.0928,
      "grad_norm": 0.45128801465034485,
      "learning_rate": 1.738184985481536e-06,
      "loss": 0.3243,
      "step": 3768
    },
    {
      "epoch": 0.093,
      "grad_norm": 0.5087855458259583,
      "learning_rate": 1.7355401872364759e-06,
      "loss": 0.3298,
      "step": 3769
    },
    {
      "epoch": 0.0932,
      "grad_norm": 0.47103574872016907,
      "learning_rate": 1.7328969800494727e-06,
      "loss": 0.3422,
      "step": 3770
    },
    {
      "epoch": 0.0934,
      "grad_norm": 0.5595421195030212,
      "learning_rate": 1.7302553652087927e-06,
      "loss": 0.3541,
      "step": 3771
    },
    {
      "epoch": 0.0936,
      "grad_norm": 0.5049716830253601,
      "learning_rate": 1.727615344001926e-06,
      "loss": 0.3461,
      "step": 3772
    },
    {
      "epoch": 0.0938,
      "grad_norm": 0.4270356297492981,
      "learning_rate": 1.7249769177155879e-06,
      "loss": 0.337,
      "step": 3773
    },
    {
      "epoch": 0.094,
      "grad_norm": 0.493681401014328,
      "learning_rate": 1.7223400876357144e-06,
      "loss": 0.3462,
      "step": 3774
    },
    {
      "epoch": 0.0942,
      "grad_norm": 0.42094358801841736,
      "learning_rate": 1.7197048550474643e-06,
      "loss": 0.3309,
      "step": 3775
    },
    {
      "epoch": 0.0944,
      "grad_norm": 0.5600056052207947,
      "learning_rate": 1.7170712212352187e-06,
      "loss": 0.3663,
      "step": 3776
    },
    {
      "epoch": 0.0946,
      "grad_norm": 0.5078986883163452,
      "learning_rate": 1.7144391874825784e-06,
      "loss": 0.3371,
      "step": 3777
    },
    {
      "epoch": 0.0948,
      "grad_norm": 0.4972310960292816,
      "learning_rate": 1.7118087550723633e-06,
      "loss": 0.3321,
      "step": 3778
    },
    {
      "epoch": 0.095,
      "grad_norm": 0.45374760031700134,
      "learning_rate": 1.709179925286617e-06,
      "loss": 0.3434,
      "step": 3779
    },
    {
      "epoch": 0.0952,
      "grad_norm": 0.3755553364753723,
      "learning_rate": 1.7065526994065973e-06,
      "loss": 0.3175,
      "step": 3780
    },
    {
      "epoch": 0.0954,
      "grad_norm": 0.4775616526603699,
      "learning_rate": 1.7039270787127832e-06,
      "loss": 0.3151,
      "step": 3781
    },
    {
      "epoch": 0.0956,
      "grad_norm": 0.44908514618873596,
      "learning_rate": 1.7013030644848698e-06,
      "loss": 0.3316,
      "step": 3782
    },
    {
      "epoch": 0.0958,
      "grad_norm": 0.4917657971382141,
      "learning_rate": 1.6986806580017695e-06,
      "loss": 0.3213,
      "step": 3783
    },
    {
      "epoch": 0.096,
      "grad_norm": 0.9419008493423462,
      "learning_rate": 1.6960598605416117e-06,
      "loss": 0.2855,
      "step": 3784
    },
    {
      "epoch": 0.0962,
      "grad_norm": 0.43114763498306274,
      "learning_rate": 1.6934406733817417e-06,
      "loss": 0.3318,
      "step": 3785
    },
    {
      "epoch": 0.0964,
      "grad_norm": 0.41727370023727417,
      "learning_rate": 1.6908230977987184e-06,
      "loss": 0.3403,
      "step": 3786
    },
    {
      "epoch": 0.0966,
      "grad_norm": 0.4208132028579712,
      "learning_rate": 1.6882071350683165e-06,
      "loss": 0.3356,
      "step": 3787
    },
    {
      "epoch": 0.0968,
      "grad_norm": 0.4475415050983429,
      "learning_rate": 1.6855927864655241e-06,
      "loss": 0.3527,
      "step": 3788
    },
    {
      "epoch": 0.097,
      "grad_norm": 0.4144686162471771,
      "learning_rate": 1.6829800532645447e-06,
      "loss": 0.3294,
      "step": 3789
    },
    {
      "epoch": 0.0972,
      "grad_norm": 0.4472198486328125,
      "learning_rate": 1.680368936738792e-06,
      "loss": 0.33,
      "step": 3790
    },
    {
      "epoch": 0.0974,
      "grad_norm": 0.6800024509429932,
      "learning_rate": 1.6777594381608936e-06,
      "loss": 0.296,
      "step": 3791
    },
    {
      "epoch": 0.0976,
      "grad_norm": 0.4277667701244354,
      "learning_rate": 1.6751515588026828e-06,
      "loss": 0.3288,
      "step": 3792
    },
    {
      "epoch": 0.0978,
      "grad_norm": 0.5162302255630493,
      "learning_rate": 1.6725452999352137e-06,
      "loss": 0.3375,
      "step": 3793
    },
    {
      "epoch": 0.098,
      "grad_norm": 0.399065226316452,
      "learning_rate": 1.6699406628287423e-06,
      "loss": 0.2936,
      "step": 3794
    },
    {
      "epoch": 0.0982,
      "grad_norm": 0.44611600041389465,
      "learning_rate": 1.6673376487527382e-06,
      "loss": 0.3109,
      "step": 3795
    },
    {
      "epoch": 0.0984,
      "grad_norm": 0.39248165488243103,
      "learning_rate": 1.6647362589758787e-06,
      "loss": 0.373,
      "step": 3796
    },
    {
      "epoch": 0.0986,
      "grad_norm": 0.43016356229782104,
      "learning_rate": 1.6621364947660472e-06,
      "loss": 0.3451,
      "step": 3797
    },
    {
      "epoch": 0.0988,
      "grad_norm": 0.4484221935272217,
      "learning_rate": 1.6595383573903412e-06,
      "loss": 0.3081,
      "step": 3798
    },
    {
      "epoch": 0.099,
      "grad_norm": 0.5050453543663025,
      "learning_rate": 1.6569418481150596e-06,
      "loss": 0.3298,
      "step": 3799
    },
    {
      "epoch": 0.0992,
      "grad_norm": 0.397652804851532,
      "learning_rate": 1.6543469682057105e-06,
      "loss": 0.3059,
      "step": 3800
    },
    {
      "epoch": 0.0994,
      "grad_norm": 0.38370439410209656,
      "learning_rate": 1.6517537189270043e-06,
      "loss": 0.3114,
      "step": 3801
    },
    {
      "epoch": 0.0996,
      "grad_norm": 0.5816129446029663,
      "learning_rate": 1.6491621015428588e-06,
      "loss": 0.3321,
      "step": 3802
    },
    {
      "epoch": 0.0998,
      "grad_norm": 0.559210479259491,
      "learning_rate": 1.6465721173164e-06,
      "loss": 0.3012,
      "step": 3803
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.4942220151424408,
      "learning_rate": 1.643983767509954e-06,
      "loss": 0.3349,
      "step": 3804
    },
    {
      "epoch": 0.1002,
      "grad_norm": 0.3832506537437439,
      "learning_rate": 1.6413970533850498e-06,
      "loss": 0.3392,
      "step": 3805
    },
    {
      "epoch": 0.1004,
      "grad_norm": 0.4040324091911316,
      "learning_rate": 1.6388119762024213e-06,
      "loss": 0.3422,
      "step": 3806
    },
    {
      "epoch": 0.1006,
      "grad_norm": 0.4903092384338379,
      "learning_rate": 1.6362285372220016e-06,
      "loss": 0.2931,
      "step": 3807
    },
    {
      "epoch": 0.1008,
      "grad_norm": 0.37180978059768677,
      "learning_rate": 1.6336467377029308e-06,
      "loss": 0.3104,
      "step": 3808
    },
    {
      "epoch": 0.101,
      "grad_norm": 0.3920961916446686,
      "learning_rate": 1.6310665789035468e-06,
      "loss": 0.3401,
      "step": 3809
    },
    {
      "epoch": 0.1012,
      "grad_norm": 0.4087616503238678,
      "learning_rate": 1.6284880620813847e-06,
      "loss": 0.2995,
      "step": 3810
    },
    {
      "epoch": 0.1014,
      "grad_norm": 0.41370514035224915,
      "learning_rate": 1.6259111884931817e-06,
      "loss": 0.3141,
      "step": 3811
    },
    {
      "epoch": 0.1016,
      "grad_norm": 0.37536126375198364,
      "learning_rate": 1.6233359593948777e-06,
      "loss": 0.3563,
      "step": 3812
    },
    {
      "epoch": 0.1018,
      "grad_norm": 0.41062864661216736,
      "learning_rate": 1.6207623760416074e-06,
      "loss": 0.3066,
      "step": 3813
    },
    {
      "epoch": 0.102,
      "grad_norm": 0.5101035237312317,
      "learning_rate": 1.6181904396877041e-06,
      "loss": 0.3558,
      "step": 3814
    },
    {
      "epoch": 0.1022,
      "grad_norm": 0.4786215126514435,
      "learning_rate": 1.6156201515866971e-06,
      "loss": 0.3372,
      "step": 3815
    },
    {
      "epoch": 0.1024,
      "grad_norm": 0.4214610159397125,
      "learning_rate": 1.6130515129913144e-06,
      "loss": 0.3333,
      "step": 3816
    },
    {
      "epoch": 0.1026,
      "grad_norm": 0.4301564693450928,
      "learning_rate": 1.6104845251534772e-06,
      "loss": 0.3309,
      "step": 3817
    },
    {
      "epoch": 0.1028,
      "grad_norm": 0.49824222922325134,
      "learning_rate": 1.6079191893243102e-06,
      "loss": 0.3389,
      "step": 3818
    },
    {
      "epoch": 0.103,
      "grad_norm": 1.3941454887390137,
      "learning_rate": 1.605355506754121e-06,
      "loss": 0.3268,
      "step": 3819
    },
    {
      "epoch": 0.1032,
      "grad_norm": 0.49321234226226807,
      "learning_rate": 1.6027934786924187e-06,
      "loss": 0.3556,
      "step": 3820
    },
    {
      "epoch": 0.1034,
      "grad_norm": 0.408451110124588,
      "learning_rate": 1.600233106387904e-06,
      "loss": 0.3193,
      "step": 3821
    },
    {
      "epoch": 0.1036,
      "grad_norm": 0.5419385433197021,
      "learning_rate": 1.597674391088474e-06,
      "loss": 0.3119,
      "step": 3822
    },
    {
      "epoch": 0.1038,
      "grad_norm": 0.4621765613555908,
      "learning_rate": 1.5951173340412134e-06,
      "loss": 0.3442,
      "step": 3823
    },
    {
      "epoch": 0.104,
      "grad_norm": 1.521638035774231,
      "learning_rate": 1.5925619364924016e-06,
      "loss": 0.3362,
      "step": 3824
    },
    {
      "epoch": 0.1042,
      "grad_norm": 0.4180125296115875,
      "learning_rate": 1.5900081996875083e-06,
      "loss": 0.347,
      "step": 3825
    },
    {
      "epoch": 0.1044,
      "grad_norm": 0.38872286677360535,
      "learning_rate": 1.587456124871191e-06,
      "loss": 0.3042,
      "step": 3826
    },
    {
      "epoch": 0.1046,
      "grad_norm": 0.4716740846633911,
      "learning_rate": 1.5849057132873063e-06,
      "loss": 0.3045,
      "step": 3827
    },
    {
      "epoch": 0.1048,
      "grad_norm": 0.46413469314575195,
      "learning_rate": 1.582356966178888e-06,
      "loss": 0.3208,
      "step": 3828
    },
    {
      "epoch": 0.105,
      "grad_norm": 0.6142104864120483,
      "learning_rate": 1.5798098847881664e-06,
      "loss": 0.3162,
      "step": 3829
    },
    {
      "epoch": 0.1052,
      "grad_norm": 0.42785486578941345,
      "learning_rate": 1.5772644703565564e-06,
      "loss": 0.3416,
      "step": 3830
    },
    {
      "epoch": 0.1054,
      "grad_norm": 0.48842552304267883,
      "learning_rate": 1.5747207241246654e-06,
      "loss": 0.3502,
      "step": 3831
    },
    {
      "epoch": 0.1056,
      "grad_norm": 0.4118221700191498,
      "learning_rate": 1.5721786473322825e-06,
      "loss": 0.3498,
      "step": 3832
    },
    {
      "epoch": 0.1058,
      "grad_norm": 1.281907081604004,
      "learning_rate": 1.5696382412183853e-06,
      "loss": 0.3439,
      "step": 3833
    },
    {
      "epoch": 0.106,
      "grad_norm": 0.4824022054672241,
      "learning_rate": 1.567099507021137e-06,
      "loss": 0.3836,
      "step": 3834
    },
    {
      "epoch": 0.1062,
      "grad_norm": 0.4940249025821686,
      "learning_rate": 1.5645624459778858e-06,
      "loss": 0.3479,
      "step": 3835
    },
    {
      "epoch": 0.1064,
      "grad_norm": 0.5029503107070923,
      "learning_rate": 1.5620270593251635e-06,
      "loss": 0.3399,
      "step": 3836
    },
    {
      "epoch": 0.1066,
      "grad_norm": 0.4563252031803131,
      "learning_rate": 1.5594933482986885e-06,
      "loss": 0.3218,
      "step": 3837
    },
    {
      "epoch": 0.1068,
      "grad_norm": 0.5320325493812561,
      "learning_rate": 1.556961314133359e-06,
      "loss": 0.3369,
      "step": 3838
    },
    {
      "epoch": 0.107,
      "grad_norm": 0.42079779505729675,
      "learning_rate": 1.554430958063259e-06,
      "loss": 0.3194,
      "step": 3839
    },
    {
      "epoch": 0.1072,
      "grad_norm": 0.42260754108428955,
      "learning_rate": 1.551902281321651e-06,
      "loss": 0.3351,
      "step": 3840
    },
    {
      "epoch": 0.1074,
      "grad_norm": 0.44992801547050476,
      "learning_rate": 1.5493752851409844e-06,
      "loss": 0.3471,
      "step": 3841
    },
    {
      "epoch": 0.1076,
      "grad_norm": 0.7414987683296204,
      "learning_rate": 1.5468499707528856e-06,
      "loss": 0.3705,
      "step": 3842
    },
    {
      "epoch": 0.1078,
      "grad_norm": 0.4166577160358429,
      "learning_rate": 1.5443263393881619e-06,
      "loss": 0.3209,
      "step": 3843
    },
    {
      "epoch": 0.108,
      "grad_norm": 0.5210364460945129,
      "learning_rate": 1.5418043922768e-06,
      "loss": 0.3828,
      "step": 3844
    },
    {
      "epoch": 0.1082,
      "grad_norm": 0.4479031264781952,
      "learning_rate": 1.5392841306479667e-06,
      "loss": 0.3469,
      "step": 3845
    },
    {
      "epoch": 0.1084,
      "grad_norm": 0.4732777774333954,
      "learning_rate": 1.5367655557300066e-06,
      "loss": 0.3589,
      "step": 3846
    },
    {
      "epoch": 0.1086,
      "grad_norm": 0.43565231561660767,
      "learning_rate": 1.5342486687504432e-06,
      "loss": 0.3107,
      "step": 3847
    },
    {
      "epoch": 0.1088,
      "grad_norm": 0.5571365356445312,
      "learning_rate": 1.531733470935976e-06,
      "loss": 0.3093,
      "step": 3848
    },
    {
      "epoch": 0.109,
      "grad_norm": 0.48648861050605774,
      "learning_rate": 1.529219963512481e-06,
      "loss": 0.3288,
      "step": 3849
    },
    {
      "epoch": 0.1092,
      "grad_norm": 0.799126923084259,
      "learning_rate": 1.5267081477050132e-06,
      "loss": 0.3084,
      "step": 3850
    },
    {
      "epoch": 0.1094,
      "grad_norm": 0.41421374678611755,
      "learning_rate": 1.5241980247378008e-06,
      "loss": 0.3271,
      "step": 3851
    },
    {
      "epoch": 0.1096,
      "grad_norm": 0.4779883623123169,
      "learning_rate": 1.521689595834246e-06,
      "loss": 0.3401,
      "step": 3852
    },
    {
      "epoch": 0.1098,
      "grad_norm": 0.656024694442749,
      "learning_rate": 1.519182862216929e-06,
      "loss": 0.3401,
      "step": 3853
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.46425992250442505,
      "learning_rate": 1.5166778251075964e-06,
      "loss": 0.3039,
      "step": 3854
    },
    {
      "epoch": 0.1102,
      "grad_norm": 0.38336482644081116,
      "learning_rate": 1.514174485727178e-06,
      "loss": 0.306,
      "step": 3855
    },
    {
      "epoch": 0.1104,
      "grad_norm": 0.43700969219207764,
      "learning_rate": 1.5116728452957686e-06,
      "loss": 0.3179,
      "step": 3856
    },
    {
      "epoch": 0.1106,
      "grad_norm": 0.557547390460968,
      "learning_rate": 1.5091729050326376e-06,
      "loss": 0.2994,
      "step": 3857
    },
    {
      "epoch": 0.1108,
      "grad_norm": 0.49241185188293457,
      "learning_rate": 1.5066746661562254e-06,
      "loss": 0.3199,
      "step": 3858
    },
    {
      "epoch": 0.111,
      "grad_norm": 0.4140123426914215,
      "learning_rate": 1.5041781298841424e-06,
      "loss": 0.333,
      "step": 3859
    },
    {
      "epoch": 0.1112,
      "grad_norm": 0.5885708332061768,
      "learning_rate": 1.5016832974331725e-06,
      "loss": 0.33,
      "step": 3860
    },
    {
      "epoch": 0.1114,
      "grad_norm": 0.7350049614906311,
      "learning_rate": 1.4991901700192657e-06,
      "loss": 0.3431,
      "step": 3861
    },
    {
      "epoch": 0.1116,
      "grad_norm": 0.5206356644630432,
      "learning_rate": 1.496698748857543e-06,
      "loss": 0.3168,
      "step": 3862
    },
    {
      "epoch": 0.1118,
      "grad_norm": 0.5180819034576416,
      "learning_rate": 1.4942090351622884e-06,
      "loss": 0.3246,
      "step": 3863
    },
    {
      "epoch": 0.112,
      "grad_norm": 0.35309529304504395,
      "learning_rate": 1.491721030146963e-06,
      "loss": 0.318,
      "step": 3864
    },
    {
      "epoch": 0.1122,
      "grad_norm": 0.46707502007484436,
      "learning_rate": 1.489234735024188e-06,
      "loss": 0.3489,
      "step": 3865
    },
    {
      "epoch": 0.1124,
      "grad_norm": 0.41357147693634033,
      "learning_rate": 1.4867501510057548e-06,
      "loss": 0.3008,
      "step": 3866
    },
    {
      "epoch": 0.1126,
      "grad_norm": 0.5301222801208496,
      "learning_rate": 1.484267279302618e-06,
      "loss": 0.3461,
      "step": 3867
    },
    {
      "epoch": 0.1128,
      "grad_norm": 0.5098255276679993,
      "learning_rate": 1.4817861211248996e-06,
      "loss": 0.3696,
      "step": 3868
    },
    {
      "epoch": 0.113,
      "grad_norm": 0.4044581651687622,
      "learning_rate": 1.4793066776818843e-06,
      "loss": 0.3123,
      "step": 3869
    },
    {
      "epoch": 0.1132,
      "grad_norm": 0.8215331435203552,
      "learning_rate": 1.4768289501820265e-06,
      "loss": 0.3427,
      "step": 3870
    },
    {
      "epoch": 0.1134,
      "grad_norm": 0.5466445088386536,
      "learning_rate": 1.4743529398329393e-06,
      "loss": 0.3338,
      "step": 3871
    },
    {
      "epoch": 0.1136,
      "grad_norm": 0.5918053984642029,
      "learning_rate": 1.4718786478413983e-06,
      "loss": 0.3664,
      "step": 3872
    },
    {
      "epoch": 0.1138,
      "grad_norm": 0.5222645401954651,
      "learning_rate": 1.469406075413342e-06,
      "loss": 0.3384,
      "step": 3873
    },
    {
      "epoch": 0.114,
      "grad_norm": 0.45238766074180603,
      "learning_rate": 1.4669352237538763e-06,
      "loss": 0.3436,
      "step": 3874
    },
    {
      "epoch": 0.1142,
      "grad_norm": 0.6149289011955261,
      "learning_rate": 1.4644660940672628e-06,
      "loss": 0.312,
      "step": 3875
    },
    {
      "epoch": 0.1144,
      "grad_norm": 0.5145707130432129,
      "learning_rate": 1.4619986875569247e-06,
      "loss": 0.3525,
      "step": 3876
    },
    {
      "epoch": 0.1146,
      "grad_norm": 0.5789161920547485,
      "learning_rate": 1.459533005425446e-06,
      "loss": 0.349,
      "step": 3877
    },
    {
      "epoch": 0.1148,
      "grad_norm": 0.46727511286735535,
      "learning_rate": 1.4570690488745687e-06,
      "loss": 0.3104,
      "step": 3878
    },
    {
      "epoch": 0.115,
      "grad_norm": 0.4135199189186096,
      "learning_rate": 1.4546068191051988e-06,
      "loss": 0.3213,
      "step": 3879
    },
    {
      "epoch": 0.1152,
      "grad_norm": 0.4889047145843506,
      "learning_rate": 1.4521463173173966e-06,
      "loss": 0.3856,
      "step": 3880
    },
    {
      "epoch": 0.1154,
      "grad_norm": 0.41762521862983704,
      "learning_rate": 1.4496875447103781e-06,
      "loss": 0.3315,
      "step": 3881
    },
    {
      "epoch": 0.1156,
      "grad_norm": 0.45093271136283875,
      "learning_rate": 1.4472305024825189e-06,
      "loss": 0.3018,
      "step": 3882
    },
    {
      "epoch": 0.1158,
      "grad_norm": 0.4077936112880707,
      "learning_rate": 1.4447751918313552e-06,
      "loss": 0.3515,
      "step": 3883
    },
    {
      "epoch": 0.116,
      "grad_norm": 0.6948106288909912,
      "learning_rate": 1.4423216139535735e-06,
      "loss": 0.3011,
      "step": 3884
    },
    {
      "epoch": 0.1162,
      "grad_norm": 0.628559410572052,
      "learning_rate": 1.4398697700450181e-06,
      "loss": 0.3308,
      "step": 3885
    },
    {
      "epoch": 0.1164,
      "grad_norm": 0.5221124291419983,
      "learning_rate": 1.4374196613006874e-06,
      "loss": 0.3541,
      "step": 3886
    },
    {
      "epoch": 0.1166,
      "grad_norm": 0.5455564856529236,
      "learning_rate": 1.4349712889147355e-06,
      "loss": 0.3633,
      "step": 3887
    },
    {
      "epoch": 0.1168,
      "grad_norm": 0.42857491970062256,
      "learning_rate": 1.4325246540804672e-06,
      "loss": 0.3473,
      "step": 3888
    },
    {
      "epoch": 0.117,
      "grad_norm": 0.5200515985488892,
      "learning_rate": 1.4300797579903476e-06,
      "loss": 0.3247,
      "step": 3889
    },
    {
      "epoch": 0.1172,
      "grad_norm": 0.3637600839138031,
      "learning_rate": 1.4276366018359845e-06,
      "loss": 0.2993,
      "step": 3890
    },
    {
      "epoch": 0.1174,
      "grad_norm": 0.39659935235977173,
      "learning_rate": 1.4251951868081438e-06,
      "loss": 0.3074,
      "step": 3891
    },
    {
      "epoch": 0.1176,
      "grad_norm": 0.39437687397003174,
      "learning_rate": 1.4227555140967402e-06,
      "loss": 0.3383,
      "step": 3892
    },
    {
      "epoch": 0.1178,
      "grad_norm": 0.3784373104572296,
      "learning_rate": 1.420317584890844e-06,
      "loss": 0.3147,
      "step": 3893
    },
    {
      "epoch": 0.118,
      "grad_norm": 0.4420725703239441,
      "learning_rate": 1.4178814003786706e-06,
      "loss": 0.3615,
      "step": 3894
    },
    {
      "epoch": 0.1182,
      "grad_norm": 0.4249776303768158,
      "learning_rate": 1.4154469617475864e-06,
      "loss": 0.3214,
      "step": 3895
    },
    {
      "epoch": 0.1184,
      "grad_norm": 0.43979597091674805,
      "learning_rate": 1.4130142701841076e-06,
      "loss": 0.3064,
      "step": 3896
    },
    {
      "epoch": 0.1186,
      "grad_norm": 0.41127029061317444,
      "learning_rate": 1.4105833268738966e-06,
      "loss": 0.2981,
      "step": 3897
    },
    {
      "epoch": 0.1188,
      "grad_norm": 0.42960450053215027,
      "learning_rate": 1.4081541330017706e-06,
      "loss": 0.3109,
      "step": 3898
    },
    {
      "epoch": 0.119,
      "grad_norm": 0.4343511462211609,
      "learning_rate": 1.4057266897516842e-06,
      "loss": 0.3364,
      "step": 3899
    },
    {
      "epoch": 0.1192,
      "grad_norm": 0.47651007771492004,
      "learning_rate": 1.4033009983067454e-06,
      "loss": 0.3414,
      "step": 3900
    },
    {
      "epoch": 0.1194,
      "grad_norm": 0.404897004365921,
      "learning_rate": 1.4008770598492072e-06,
      "loss": 0.3134,
      "step": 3901
    },
    {
      "epoch": 0.1196,
      "grad_norm": 0.46442610025405884,
      "learning_rate": 1.3984548755604655e-06,
      "loss": 0.3138,
      "step": 3902
    },
    {
      "epoch": 0.1198,
      "grad_norm": 0.4677254259586334,
      "learning_rate": 1.3960344466210669e-06,
      "loss": 0.3206,
      "step": 3903
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.4217228293418884,
      "learning_rate": 1.3936157742106977e-06,
      "loss": 0.3509,
      "step": 3904
    },
    {
      "epoch": 0.1202,
      "grad_norm": 0.5240967869758606,
      "learning_rate": 1.3911988595081894e-06,
      "loss": 0.3318,
      "step": 3905
    },
    {
      "epoch": 0.1204,
      "grad_norm": 0.49380263686180115,
      "learning_rate": 1.3887837036915169e-06,
      "loss": 0.326,
      "step": 3906
    },
    {
      "epoch": 0.1206,
      "grad_norm": 0.5374504327774048,
      "learning_rate": 1.3863703079377971e-06,
      "loss": 0.3234,
      "step": 3907
    },
    {
      "epoch": 0.1208,
      "grad_norm": 0.4033348262310028,
      "learning_rate": 1.3839586734232907e-06,
      "loss": 0.3303,
      "step": 3908
    },
    {
      "epoch": 0.121,
      "grad_norm": 0.4741966724395752,
      "learning_rate": 1.3815488013233986e-06,
      "loss": 0.3379,
      "step": 3909
    },
    {
      "epoch": 0.1212,
      "grad_norm": 2.6765551567077637,
      "learning_rate": 1.3791406928126638e-06,
      "loss": 0.3229,
      "step": 3910
    },
    {
      "epoch": 0.1214,
      "grad_norm": 0.4746617376804352,
      "learning_rate": 1.3767343490647668e-06,
      "loss": 0.3186,
      "step": 3911
    },
    {
      "epoch": 0.1216,
      "grad_norm": 0.6164945960044861,
      "learning_rate": 1.3743297712525334e-06,
      "loss": 0.3214,
      "step": 3912
    },
    {
      "epoch": 0.1218,
      "grad_norm": 0.6686504483222961,
      "learning_rate": 1.3719269605479241e-06,
      "loss": 0.3062,
      "step": 3913
    },
    {
      "epoch": 0.122,
      "grad_norm": 0.4040912687778473,
      "learning_rate": 1.3695259181220405e-06,
      "loss": 0.3322,
      "step": 3914
    },
    {
      "epoch": 0.1222,
      "grad_norm": 0.40552645921707153,
      "learning_rate": 1.3671266451451209e-06,
      "loss": 0.3465,
      "step": 3915
    },
    {
      "epoch": 0.1224,
      "grad_norm": 0.430776983499527,
      "learning_rate": 1.3647291427865417e-06,
      "loss": 0.3474,
      "step": 3916
    },
    {
      "epoch": 0.1226,
      "grad_norm": 0.4674695134162903,
      "learning_rate": 1.3623334122148164e-06,
      "loss": 0.341,
      "step": 3917
    },
    {
      "epoch": 0.1228,
      "grad_norm": 0.36589354276657104,
      "learning_rate": 1.3599394545975952e-06,
      "loss": 0.3138,
      "step": 3918
    },
    {
      "epoch": 0.123,
      "grad_norm": 0.49210667610168457,
      "learning_rate": 1.3575472711016634e-06,
      "loss": 0.3216,
      "step": 3919
    },
    {
      "epoch": 0.1232,
      "grad_norm": 0.425616979598999,
      "learning_rate": 1.3551568628929434e-06,
      "loss": 0.3349,
      "step": 3920
    },
    {
      "epoch": 0.1234,
      "grad_norm": 0.45165467262268066,
      "learning_rate": 1.3527682311364886e-06,
      "loss": 0.327,
      "step": 3921
    },
    {
      "epoch": 0.1236,
      "grad_norm": 0.4631829857826233,
      "learning_rate": 1.3503813769964923e-06,
      "loss": 0.3158,
      "step": 3922
    },
    {
      "epoch": 0.1238,
      "grad_norm": 0.432025671005249,
      "learning_rate": 1.3479963016362768e-06,
      "loss": 0.3186,
      "step": 3923
    },
    {
      "epoch": 0.124,
      "grad_norm": 0.5064100027084351,
      "learning_rate": 1.3456130062183003e-06,
      "loss": 0.3356,
      "step": 3924
    },
    {
      "epoch": 0.1242,
      "grad_norm": 0.42121708393096924,
      "learning_rate": 1.3432314919041478e-06,
      "loss": 0.3563,
      "step": 3925
    },
    {
      "epoch": 0.1244,
      "grad_norm": 0.48816683888435364,
      "learning_rate": 1.3408517598545446e-06,
      "loss": 0.3112,
      "step": 3926
    },
    {
      "epoch": 0.1246,
      "grad_norm": 0.49925246834754944,
      "learning_rate": 1.3384738112293415e-06,
      "loss": 0.3269,
      "step": 3927
    },
    {
      "epoch": 0.1248,
      "grad_norm": 0.698678731918335,
      "learning_rate": 1.3360976471875226e-06,
      "loss": 0.3457,
      "step": 3928
    },
    {
      "epoch": 0.125,
      "grad_norm": 0.6371123790740967,
      "learning_rate": 1.333723268887201e-06,
      "loss": 0.3042,
      "step": 3929
    },
    {
      "epoch": 0.1252,
      "grad_norm": 0.45761173963546753,
      "learning_rate": 1.3313506774856177e-06,
      "loss": 0.3512,
      "step": 3930
    },
    {
      "epoch": 0.1254,
      "grad_norm": 0.5153129696846008,
      "learning_rate": 1.3289798741391486e-06,
      "loss": 0.3417,
      "step": 3931
    },
    {
      "epoch": 0.1256,
      "grad_norm": 0.7276511192321777,
      "learning_rate": 1.3266108600032928e-06,
      "loss": 0.3392,
      "step": 3932
    },
    {
      "epoch": 0.1258,
      "grad_norm": 0.5264747142791748,
      "learning_rate": 1.3242436362326804e-06,
      "loss": 0.3287,
      "step": 3933
    },
    {
      "epoch": 0.126,
      "grad_norm": 0.4628417193889618,
      "learning_rate": 1.3218782039810634e-06,
      "loss": 0.3738,
      "step": 3934
    },
    {
      "epoch": 0.1262,
      "grad_norm": 0.4905399680137634,
      "learning_rate": 1.3195145644013286e-06,
      "loss": 0.3195,
      "step": 3935
    },
    {
      "epoch": 0.1264,
      "grad_norm": 0.3977375328540802,
      "learning_rate": 1.317152718645484e-06,
      "loss": 0.3161,
      "step": 3936
    },
    {
      "epoch": 0.1266,
      "grad_norm": 0.4387080669403076,
      "learning_rate": 1.314792667864665e-06,
      "loss": 0.3547,
      "step": 3937
    },
    {
      "epoch": 0.1268,
      "grad_norm": 0.4310533404350281,
      "learning_rate": 1.312434413209131e-06,
      "loss": 0.3073,
      "step": 3938
    },
    {
      "epoch": 0.127,
      "grad_norm": 0.40620148181915283,
      "learning_rate": 1.3100779558282673e-06,
      "loss": 0.337,
      "step": 3939
    },
    {
      "epoch": 0.1272,
      "grad_norm": 0.4161722660064697,
      "learning_rate": 1.3077232968705805e-06,
      "loss": 0.3566,
      "step": 3940
    },
    {
      "epoch": 0.1274,
      "grad_norm": 0.4758037030696869,
      "learning_rate": 1.3053704374837063e-06,
      "loss": 0.3386,
      "step": 3941
    },
    {
      "epoch": 0.1276,
      "grad_norm": 0.67817223072052,
      "learning_rate": 1.3030193788143991e-06,
      "loss": 0.3024,
      "step": 3942
    },
    {
      "epoch": 0.1278,
      "grad_norm": 0.5044216513633728,
      "learning_rate": 1.3006701220085338e-06,
      "loss": 0.3774,
      "step": 3943
    },
    {
      "epoch": 0.128,
      "grad_norm": 0.909024715423584,
      "learning_rate": 1.2983226682111094e-06,
      "loss": 0.3575,
      "step": 3944
    },
    {
      "epoch": 0.1282,
      "grad_norm": 0.4636319577693939,
      "learning_rate": 1.2959770185662502e-06,
      "loss": 0.3231,
      "step": 3945
    },
    {
      "epoch": 0.1284,
      "grad_norm": 0.42164215445518494,
      "learning_rate": 1.2936331742171943e-06,
      "loss": 0.3,
      "step": 3946
    },
    {
      "epoch": 0.1286,
      "grad_norm": 0.4715688228607178,
      "learning_rate": 1.2912911363063048e-06,
      "loss": 0.36,
      "step": 3947
    },
    {
      "epoch": 0.1288,
      "grad_norm": 0.4709605276584625,
      "learning_rate": 1.2889509059750605e-06,
      "loss": 0.3199,
      "step": 3948
    },
    {
      "epoch": 0.129,
      "grad_norm": 0.607223391532898,
      "learning_rate": 1.2866124843640614e-06,
      "loss": 0.3089,
      "step": 3949
    },
    {
      "epoch": 0.1292,
      "grad_norm": 0.4074137806892395,
      "learning_rate": 1.2842758726130283e-06,
      "loss": 0.3234,
      "step": 3950
    },
    {
      "epoch": 0.1294,
      "grad_norm": 0.42905011773109436,
      "learning_rate": 1.2819410718607972e-06,
      "loss": 0.3561,
      "step": 3951
    },
    {
      "epoch": 0.1296,
      "grad_norm": 0.5114502906799316,
      "learning_rate": 1.2796080832453183e-06,
      "loss": 0.3537,
      "step": 3952
    },
    {
      "epoch": 0.1298,
      "grad_norm": 0.48266279697418213,
      "learning_rate": 1.2772769079036639e-06,
      "loss": 0.336,
      "step": 3953
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.5068391561508179,
      "learning_rate": 1.2749475469720196e-06,
      "loss": 0.3281,
      "step": 3954
    },
    {
      "epoch": 0.1302,
      "grad_norm": 0.3870641887187958,
      "learning_rate": 1.2726200015856893e-06,
      "loss": 0.3322,
      "step": 3955
    },
    {
      "epoch": 0.1304,
      "grad_norm": 0.4616537094116211,
      "learning_rate": 1.2702942728790897e-06,
      "loss": 0.3337,
      "step": 3956
    },
    {
      "epoch": 0.1306,
      "grad_norm": 0.4034566879272461,
      "learning_rate": 1.2679703619857525e-06,
      "loss": 0.3445,
      "step": 3957
    },
    {
      "epoch": 0.1308,
      "grad_norm": 0.37781453132629395,
      "learning_rate": 1.2656482700383238e-06,
      "loss": 0.317,
      "step": 3958
    },
    {
      "epoch": 0.131,
      "grad_norm": 0.45045000314712524,
      "learning_rate": 1.2633279981685608e-06,
      "loss": 0.3113,
      "step": 3959
    },
    {
      "epoch": 0.1312,
      "grad_norm": 0.4577398896217346,
      "learning_rate": 1.2610095475073415e-06,
      "loss": 0.3432,
      "step": 3960
    },
    {
      "epoch": 0.1314,
      "grad_norm": 0.4596940279006958,
      "learning_rate": 1.2586929191846453e-06,
      "loss": 0.3738,
      "step": 3961
    },
    {
      "epoch": 0.1316,
      "grad_norm": 0.42095568776130676,
      "learning_rate": 1.2563781143295705e-06,
      "loss": 0.3466,
      "step": 3962
    },
    {
      "epoch": 0.1318,
      "grad_norm": 0.43722590804100037,
      "learning_rate": 1.2540651340703231e-06,
      "loss": 0.3188,
      "step": 3963
    },
    {
      "epoch": 0.132,
      "grad_norm": 0.3839768171310425,
      "learning_rate": 1.2517539795342248e-06,
      "loss": 0.3206,
      "step": 3964
    },
    {
      "epoch": 0.1322,
      "grad_norm": 0.39017385244369507,
      "learning_rate": 1.2494446518477022e-06,
      "loss": 0.3169,
      "step": 3965
    },
    {
      "epoch": 0.1324,
      "grad_norm": 0.4439059793949127,
      "learning_rate": 1.2471371521362946e-06,
      "loss": 0.304,
      "step": 3966
    },
    {
      "epoch": 0.1326,
      "grad_norm": 0.3890194296836853,
      "learning_rate": 1.2448314815246487e-06,
      "loss": 0.3216,
      "step": 3967
    },
    {
      "epoch": 0.1328,
      "grad_norm": 0.4674232006072998,
      "learning_rate": 1.24252764113652e-06,
      "loss": 0.3379,
      "step": 3968
    },
    {
      "epoch": 0.133,
      "grad_norm": 0.4335528612136841,
      "learning_rate": 1.240225632094773e-06,
      "loss": 0.3362,
      "step": 3969
    },
    {
      "epoch": 0.1332,
      "grad_norm": 0.5320464968681335,
      "learning_rate": 1.2379254555213788e-06,
      "loss": 0.3478,
      "step": 3970
    },
    {
      "epoch": 0.1334,
      "grad_norm": 0.4751874506473541,
      "learning_rate": 1.2356271125374153e-06,
      "loss": 0.3346,
      "step": 3971
    },
    {
      "epoch": 0.1336,
      "grad_norm": 0.5928405523300171,
      "learning_rate": 1.2333306042630672e-06,
      "loss": 0.3434,
      "step": 3972
    },
    {
      "epoch": 0.1338,
      "grad_norm": 0.45560845732688904,
      "learning_rate": 1.2310359318176229e-06,
      "loss": 0.3114,
      "step": 3973
    },
    {
      "epoch": 0.134,
      "grad_norm": 0.5022554397583008,
      "learning_rate": 1.2287430963194807e-06,
      "loss": 0.3085,
      "step": 3974
    },
    {
      "epoch": 0.1342,
      "grad_norm": 0.4856623411178589,
      "learning_rate": 1.22645209888614e-06,
      "loss": 0.3128,
      "step": 3975
    },
    {
      "epoch": 0.1344,
      "grad_norm": 0.4814973771572113,
      "learning_rate": 1.2241629406342048e-06,
      "loss": 0.3341,
      "step": 3976
    },
    {
      "epoch": 0.1346,
      "grad_norm": 0.49088525772094727,
      "learning_rate": 1.2218756226793827e-06,
      "loss": 0.3418,
      "step": 3977
    },
    {
      "epoch": 0.1348,
      "grad_norm": 0.4003295600414276,
      "learning_rate": 1.2195901461364851e-06,
      "loss": 0.3174,
      "step": 3978
    },
    {
      "epoch": 0.135,
      "grad_norm": 0.4521643817424774,
      "learning_rate": 1.217306512119425e-06,
      "loss": 0.3011,
      "step": 3979
    },
    {
      "epoch": 0.1352,
      "grad_norm": 0.46749427914619446,
      "learning_rate": 1.2150247217412186e-06,
      "loss": 0.3575,
      "step": 3980
    },
    {
      "epoch": 0.1354,
      "grad_norm": 0.5501726269721985,
      "learning_rate": 1.2127447761139821e-06,
      "loss": 0.3348,
      "step": 3981
    },
    {
      "epoch": 0.1356,
      "grad_norm": 0.5494904518127441,
      "learning_rate": 1.2104666763489326e-06,
      "loss": 0.3424,
      "step": 3982
    },
    {
      "epoch": 0.1358,
      "grad_norm": 0.44534894824028015,
      "learning_rate": 1.2081904235563908e-06,
      "loss": 0.322,
      "step": 3983
    },
    {
      "epoch": 0.136,
      "grad_norm": 0.532357931137085,
      "learning_rate": 1.2059160188457724e-06,
      "loss": 0.3519,
      "step": 3984
    },
    {
      "epoch": 0.1362,
      "grad_norm": 0.4871266484260559,
      "learning_rate": 1.203643463325596e-06,
      "loss": 0.3439,
      "step": 3985
    },
    {
      "epoch": 0.1364,
      "grad_norm": 0.3549700379371643,
      "learning_rate": 1.2013727581034783e-06,
      "loss": 0.2964,
      "step": 3986
    },
    {
      "epoch": 0.1366,
      "grad_norm": 0.4282408356666565,
      "learning_rate": 1.199103904286129e-06,
      "loss": 0.3279,
      "step": 3987
    },
    {
      "epoch": 0.1368,
      "grad_norm": 0.4133321940898895,
      "learning_rate": 1.1968369029793642e-06,
      "loss": 0.3414,
      "step": 3988
    },
    {
      "epoch": 0.137,
      "grad_norm": 0.45225781202316284,
      "learning_rate": 1.1945717552880919e-06,
      "loss": 0.2963,
      "step": 3989
    },
    {
      "epoch": 0.1372,
      "grad_norm": 0.3876187801361084,
      "learning_rate": 1.1923084623163172e-06,
      "loss": 0.3056,
      "step": 3990
    },
    {
      "epoch": 0.1374,
      "grad_norm": 0.6308560967445374,
      "learning_rate": 1.1900470251671415e-06,
      "loss": 0.3236,
      "step": 3991
    },
    {
      "epoch": 0.1376,
      "grad_norm": 0.4104933738708496,
      "learning_rate": 1.18778744494276e-06,
      "loss": 0.3306,
      "step": 3992
    },
    {
      "epoch": 0.1378,
      "grad_norm": 0.5525667071342468,
      "learning_rate": 1.185529722744469e-06,
      "loss": 0.3555,
      "step": 3993
    },
    {
      "epoch": 0.138,
      "grad_norm": 0.48480719327926636,
      "learning_rate": 1.1832738596726518e-06,
      "loss": 0.359,
      "step": 3994
    },
    {
      "epoch": 0.1382,
      "grad_norm": 0.41685691475868225,
      "learning_rate": 1.1810198568267906e-06,
      "loss": 0.3416,
      "step": 3995
    },
    {
      "epoch": 0.1384,
      "grad_norm": 0.5363840460777283,
      "learning_rate": 1.178767715305455e-06,
      "loss": 0.3607,
      "step": 3996
    },
    {
      "epoch": 0.1386,
      "grad_norm": 0.46700796484947205,
      "learning_rate": 1.1765174362063152e-06,
      "loss": 0.2975,
      "step": 3997
    },
    {
      "epoch": 0.1388,
      "grad_norm": 0.531520426273346,
      "learning_rate": 1.1742690206261293e-06,
      "loss": 0.3435,
      "step": 3998
    },
    {
      "epoch": 0.139,
      "grad_norm": 0.6426631808280945,
      "learning_rate": 1.1720224696607474e-06,
      "loss": 0.3036,
      "step": 3999
    },
    {
      "epoch": 0.1392,
      "grad_norm": 0.4834071099758148,
      "learning_rate": 1.1697777844051105e-06,
      "loss": 0.3181,
      "step": 4000
    },
    {
      "epoch": 0.1394,
      "grad_norm": 0.5063309073448181,
      "learning_rate": 1.1675349659532514e-06,
      "loss": 0.3315,
      "step": 4001
    },
    {
      "epoch": 0.1396,
      "grad_norm": 0.40673768520355225,
      "learning_rate": 1.1652940153982917e-06,
      "loss": 0.3343,
      "step": 4002
    },
    {
      "epoch": 0.1398,
      "grad_norm": 0.36498957872390747,
      "learning_rate": 1.1630549338324454e-06,
      "loss": 0.3089,
      "step": 4003
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.513603687286377,
      "learning_rate": 1.160817722347014e-06,
      "loss": 0.3586,
      "step": 4004
    },
    {
      "epoch": 0.1402,
      "grad_norm": 0.5648996829986572,
      "learning_rate": 1.1585823820323845e-06,
      "loss": 0.3075,
      "step": 4005
    },
    {
      "epoch": 0.1404,
      "grad_norm": 0.575992226600647,
      "learning_rate": 1.1563489139780344e-06,
      "loss": 0.3434,
      "step": 4006
    },
    {
      "epoch": 0.1406,
      "grad_norm": 0.39495763182640076,
      "learning_rate": 1.154117319272532e-06,
      "loss": 0.3356,
      "step": 4007
    },
    {
      "epoch": 0.1408,
      "grad_norm": 0.41248267889022827,
      "learning_rate": 1.1518875990035278e-06,
      "loss": 0.349,
      "step": 4008
    },
    {
      "epoch": 0.141,
      "grad_norm": 0.37093400955200195,
      "learning_rate": 1.1496597542577603e-06,
      "loss": 0.296,
      "step": 4009
    },
    {
      "epoch": 0.1412,
      "grad_norm": 0.4594757556915283,
      "learning_rate": 1.1474337861210543e-06,
      "loss": 0.3421,
      "step": 4010
    },
    {
      "epoch": 0.1414,
      "grad_norm": 0.4270641803741455,
      "learning_rate": 1.1452096956783181e-06,
      "loss": 0.3248,
      "step": 4011
    },
    {
      "epoch": 0.1416,
      "grad_norm": 0.4724912643432617,
      "learning_rate": 1.1429874840135492e-06,
      "loss": 0.3497,
      "step": 4012
    },
    {
      "epoch": 0.1418,
      "grad_norm": 0.5714061260223389,
      "learning_rate": 1.1407671522098262e-06,
      "loss": 0.3608,
      "step": 4013
    },
    {
      "epoch": 0.142,
      "grad_norm": 0.4346083402633667,
      "learning_rate": 1.1385487013493095e-06,
      "loss": 0.3134,
      "step": 4014
    },
    {
      "epoch": 0.1422,
      "grad_norm": 0.38725656270980835,
      "learning_rate": 1.136332132513245e-06,
      "loss": 0.3168,
      "step": 4015
    },
    {
      "epoch": 0.1424,
      "grad_norm": 0.38934317231178284,
      "learning_rate": 1.1341174467819637e-06,
      "loss": 0.3192,
      "step": 4016
    },
    {
      "epoch": 0.1426,
      "grad_norm": 0.4154492914676666,
      "learning_rate": 1.1319046452348758e-06,
      "loss": 0.3241,
      "step": 4017
    },
    {
      "epoch": 0.1428,
      "grad_norm": 0.4466599225997925,
      "learning_rate": 1.129693728950474e-06,
      "loss": 0.3255,
      "step": 4018
    },
    {
      "epoch": 0.143,
      "grad_norm": 0.47563767433166504,
      "learning_rate": 1.1274846990063314e-06,
      "loss": 0.3142,
      "step": 4019
    },
    {
      "epoch": 0.1432,
      "grad_norm": 0.755926787853241,
      "learning_rate": 1.1252775564791023e-06,
      "loss": 0.3342,
      "step": 4020
    },
    {
      "epoch": 0.1434,
      "grad_norm": 0.471413254737854,
      "learning_rate": 1.1230723024445212e-06,
      "loss": 0.328,
      "step": 4021
    },
    {
      "epoch": 0.1436,
      "grad_norm": 0.4472315311431885,
      "learning_rate": 1.120868937977404e-06,
      "loss": 0.3369,
      "step": 4022
    },
    {
      "epoch": 0.1438,
      "grad_norm": 0.38072383403778076,
      "learning_rate": 1.1186674641516415e-06,
      "loss": 0.3401,
      "step": 4023
    },
    {
      "epoch": 0.144,
      "grad_norm": 0.4435025155544281,
      "learning_rate": 1.1164678820402059e-06,
      "loss": 0.3187,
      "step": 4024
    },
    {
      "epoch": 0.1442,
      "grad_norm": 0.6081720590591431,
      "learning_rate": 1.1142701927151456e-06,
      "loss": 0.356,
      "step": 4025
    },
    {
      "epoch": 0.1444,
      "grad_norm": 0.3921774923801422,
      "learning_rate": 1.11207439724759e-06,
      "loss": 0.3497,
      "step": 4026
    },
    {
      "epoch": 0.1446,
      "grad_norm": 0.7264822125434875,
      "learning_rate": 1.1098804967077425e-06,
      "loss": 0.3531,
      "step": 4027
    },
    {
      "epoch": 0.1448,
      "grad_norm": 0.43501096963882446,
      "learning_rate": 1.1076884921648834e-06,
      "loss": 0.3247,
      "step": 4028
    },
    {
      "epoch": 0.145,
      "grad_norm": 0.6013414263725281,
      "learning_rate": 1.1054983846873684e-06,
      "loss": 0.3208,
      "step": 4029
    },
    {
      "epoch": 0.1452,
      "grad_norm": 0.5425536632537842,
      "learning_rate": 1.1033101753426285e-06,
      "loss": 0.3239,
      "step": 4030
    },
    {
      "epoch": 0.1454,
      "grad_norm": 0.4528934955596924,
      "learning_rate": 1.1011238651971744e-06,
      "loss": 0.3286,
      "step": 4031
    },
    {
      "epoch": 0.1456,
      "grad_norm": 0.5648215413093567,
      "learning_rate": 1.0989394553165833e-06,
      "loss": 0.317,
      "step": 4032
    },
    {
      "epoch": 0.1458,
      "grad_norm": 0.5503842234611511,
      "learning_rate": 1.0967569467655104e-06,
      "loss": 0.3005,
      "step": 4033
    },
    {
      "epoch": 0.146,
      "grad_norm": 0.530215859413147,
      "learning_rate": 1.0945763406076837e-06,
      "loss": 0.3365,
      "step": 4034
    },
    {
      "epoch": 0.1462,
      "grad_norm": 0.49376294016838074,
      "learning_rate": 1.0923976379059059e-06,
      "loss": 0.2995,
      "step": 4035
    },
    {
      "epoch": 0.1464,
      "grad_norm": 0.48339444398880005,
      "learning_rate": 1.09022083972205e-06,
      "loss": 0.3406,
      "step": 4036
    },
    {
      "epoch": 0.1466,
      "grad_norm": 0.45498886704444885,
      "learning_rate": 1.0880459471170597e-06,
      "loss": 0.3076,
      "step": 4037
    },
    {
      "epoch": 0.1468,
      "grad_norm": 0.5831733345985413,
      "learning_rate": 1.0858729611509516e-06,
      "loss": 0.346,
      "step": 4038
    },
    {
      "epoch": 0.147,
      "grad_norm": 0.3985000550746918,
      "learning_rate": 1.0837018828828133e-06,
      "loss": 0.3301,
      "step": 4039
    },
    {
      "epoch": 0.1472,
      "grad_norm": 0.4855625629425049,
      "learning_rate": 1.0815327133708015e-06,
      "loss": 0.3706,
      "step": 4040
    },
    {
      "epoch": 0.1474,
      "grad_norm": 0.4672054052352905,
      "learning_rate": 1.0793654536721432e-06,
      "loss": 0.3268,
      "step": 4041
    },
    {
      "epoch": 0.1476,
      "grad_norm": 0.41936013102531433,
      "learning_rate": 1.077200104843134e-06,
      "loss": 0.3176,
      "step": 4042
    },
    {
      "epoch": 0.1478,
      "grad_norm": 0.7152717709541321,
      "learning_rate": 1.0750366679391393e-06,
      "loss": 0.3504,
      "step": 4043
    },
    {
      "epoch": 0.148,
      "grad_norm": 0.3755528926849365,
      "learning_rate": 1.0728751440145907e-06,
      "loss": 0.3144,
      "step": 4044
    },
    {
      "epoch": 0.1482,
      "grad_norm": 0.39718520641326904,
      "learning_rate": 1.0707155341229902e-06,
      "loss": 0.2952,
      "step": 4045
    },
    {
      "epoch": 0.1484,
      "grad_norm": 1.0552222728729248,
      "learning_rate": 1.0685578393169054e-06,
      "loss": 0.3545,
      "step": 4046
    },
    {
      "epoch": 0.1486,
      "grad_norm": 0.39825180172920227,
      "learning_rate": 1.0664020606479702e-06,
      "loss": 0.3198,
      "step": 4047
    },
    {
      "epoch": 0.1488,
      "grad_norm": 0.4346916377544403,
      "learning_rate": 1.064248199166884e-06,
      "loss": 0.3231,
      "step": 4048
    },
    {
      "epoch": 0.149,
      "grad_norm": 0.7579618096351624,
      "learning_rate": 1.0620962559234144e-06,
      "loss": 0.3454,
      "step": 4049
    },
    {
      "epoch": 0.1492,
      "grad_norm": 0.6288596987724304,
      "learning_rate": 1.0599462319663906e-06,
      "loss": 0.3474,
      "step": 4050
    },
    {
      "epoch": 0.1494,
      "grad_norm": 0.4851032793521881,
      "learning_rate": 1.0577981283437095e-06,
      "loss": 0.3157,
      "step": 4051
    },
    {
      "epoch": 0.1496,
      "grad_norm": 0.5262159109115601,
      "learning_rate": 1.0556519461023301e-06,
      "loss": 0.3068,
      "step": 4052
    },
    {
      "epoch": 0.1498,
      "grad_norm": 0.4498942494392395,
      "learning_rate": 1.053507686288276e-06,
      "loss": 0.3352,
      "step": 4053
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.4961492121219635,
      "learning_rate": 1.0513653499466315e-06,
      "loss": 0.2854,
      "step": 4054
    },
    {
      "epoch": 0.1502,
      "grad_norm": 0.41319096088409424,
      "learning_rate": 1.049224938121548e-06,
      "loss": 0.2983,
      "step": 4055
    },
    {
      "epoch": 0.1504,
      "grad_norm": 0.6417260766029358,
      "learning_rate": 1.047086451856235e-06,
      "loss": 0.3358,
      "step": 4056
    },
    {
      "epoch": 0.1506,
      "grad_norm": 0.4457988739013672,
      "learning_rate": 1.0449498921929669e-06,
      "loss": 0.3418,
      "step": 4057
    },
    {
      "epoch": 0.1508,
      "grad_norm": 0.4929046630859375,
      "learning_rate": 1.0428152601730718e-06,
      "loss": 0.3401,
      "step": 4058
    },
    {
      "epoch": 0.151,
      "grad_norm": 0.38892805576324463,
      "learning_rate": 1.0406825568369478e-06,
      "loss": 0.3016,
      "step": 4059
    },
    {
      "epoch": 0.1512,
      "grad_norm": 0.48367148637771606,
      "learning_rate": 1.0385517832240472e-06,
      "loss": 0.3181,
      "step": 4060
    },
    {
      "epoch": 0.1514,
      "grad_norm": 0.3779263496398926,
      "learning_rate": 1.036422940372883e-06,
      "loss": 0.33,
      "step": 4061
    },
    {
      "epoch": 0.1516,
      "grad_norm": 0.44899800419807434,
      "learning_rate": 1.0342960293210281e-06,
      "loss": 0.3681,
      "step": 4062
    },
    {
      "epoch": 0.1518,
      "grad_norm": 0.49471375346183777,
      "learning_rate": 1.0321710511051108e-06,
      "loss": 0.3188,
      "step": 4063
    },
    {
      "epoch": 0.152,
      "grad_norm": 0.5813892483711243,
      "learning_rate": 1.0300480067608232e-06,
      "loss": 0.3497,
      "step": 4064
    },
    {
      "epoch": 0.1522,
      "grad_norm": 0.7721248269081116,
      "learning_rate": 1.0279268973229089e-06,
      "loss": 0.3319,
      "step": 4065
    },
    {
      "epoch": 0.1524,
      "grad_norm": 0.5973314642906189,
      "learning_rate": 1.0258077238251735e-06,
      "loss": 0.3229,
      "step": 4066
    },
    {
      "epoch": 0.1526,
      "grad_norm": 0.5387126803398132,
      "learning_rate": 1.0236904873004722e-06,
      "loss": 0.3219,
      "step": 4067
    },
    {
      "epoch": 0.1528,
      "grad_norm": 0.4566875100135803,
      "learning_rate": 1.0215751887807228e-06,
      "loss": 0.3232,
      "step": 4068
    },
    {
      "epoch": 0.153,
      "grad_norm": 0.4556554853916168,
      "learning_rate": 1.0194618292968972e-06,
      "loss": 0.3216,
      "step": 4069
    },
    {
      "epoch": 0.1532,
      "grad_norm": 0.8839408755302429,
      "learning_rate": 1.0173504098790188e-06,
      "loss": 0.3374,
      "step": 4070
    },
    {
      "epoch": 0.1534,
      "grad_norm": 0.8320318460464478,
      "learning_rate": 1.0152409315561696e-06,
      "loss": 0.3417,
      "step": 4071
    },
    {
      "epoch": 0.1536,
      "grad_norm": 0.4160752296447754,
      "learning_rate": 1.0131333953564825e-06,
      "loss": 0.3461,
      "step": 4072
    },
    {
      "epoch": 0.1538,
      "grad_norm": 0.4674762487411499,
      "learning_rate": 1.0110278023071445e-06,
      "loss": 0.3392,
      "step": 4073
    },
    {
      "epoch": 0.154,
      "grad_norm": 0.4217280447483063,
      "learning_rate": 1.0089241534343986e-06,
      "loss": 0.3394,
      "step": 4074
    },
    {
      "epoch": 0.1542,
      "grad_norm": 0.5338664650917053,
      "learning_rate": 1.006822449763537e-06,
      "loss": 0.3584,
      "step": 4075
    },
    {
      "epoch": 0.1544,
      "grad_norm": 0.5100220441818237,
      "learning_rate": 1.0047226923189024e-06,
      "loss": 0.3393,
      "step": 4076
    },
    {
      "epoch": 0.1546,
      "grad_norm": 0.6075713634490967,
      "learning_rate": 1.0026248821238915e-06,
      "loss": 0.3357,
      "step": 4077
    },
    {
      "epoch": 0.1548,
      "grad_norm": 0.5471237301826477,
      "learning_rate": 1.0005290202009533e-06,
      "loss": 0.3361,
      "step": 4078
    },
    {
      "epoch": 0.155,
      "grad_norm": 0.49208641052246094,
      "learning_rate": 9.984351075715848e-07,
      "loss": 0.335,
      "step": 4079
    },
    {
      "epoch": 0.1552,
      "grad_norm": 0.4436572790145874,
      "learning_rate": 9.963431452563331e-07,
      "loss": 0.3193,
      "step": 4080
    },
    {
      "epoch": 0.1554,
      "grad_norm": 0.5073627829551697,
      "learning_rate": 9.942531342747953e-07,
      "loss": 0.3351,
      "step": 4081
    },
    {
      "epoch": 0.1556,
      "grad_norm": 0.5689370632171631,
      "learning_rate": 9.921650756456164e-07,
      "loss": 0.3457,
      "step": 4082
    },
    {
      "epoch": 0.1558,
      "grad_norm": 0.4261972904205322,
      "learning_rate": 9.900789703864933e-07,
      "loss": 0.3534,
      "step": 4083
    },
    {
      "epoch": 0.156,
      "grad_norm": 0.4942755699157715,
      "learning_rate": 9.879948195141681e-07,
      "loss": 0.3678,
      "step": 4084
    },
    {
      "epoch": 0.1562,
      "grad_norm": 0.48034417629241943,
      "learning_rate": 9.859126240444284e-07,
      "loss": 0.3145,
      "step": 4085
    },
    {
      "epoch": 0.1564,
      "grad_norm": 0.45775163173675537,
      "learning_rate": 9.838323849921123e-07,
      "loss": 0.3353,
      "step": 4086
    },
    {
      "epoch": 0.1566,
      "grad_norm": 0.6124879717826843,
      "learning_rate": 9.81754103371101e-07,
      "loss": 0.3613,
      "step": 4087
    },
    {
      "epoch": 0.1568,
      "grad_norm": 0.428538054227829,
      "learning_rate": 9.79677780194327e-07,
      "loss": 0.3289,
      "step": 4088
    },
    {
      "epoch": 0.157,
      "grad_norm": 0.46972060203552246,
      "learning_rate": 9.77603416473763e-07,
      "loss": 0.3258,
      "step": 4089
    },
    {
      "epoch": 0.1572,
      "grad_norm": 0.4956949055194855,
      "learning_rate": 9.7553101322043e-07,
      "loss": 0.3462,
      "step": 4090
    },
    {
      "epoch": 0.1574,
      "grad_norm": 0.39581045508384705,
      "learning_rate": 9.734605714443906e-07,
      "loss": 0.3259,
      "step": 4091
    },
    {
      "epoch": 0.1576,
      "grad_norm": 0.5687603950500488,
      "learning_rate": 9.713920921547532e-07,
      "loss": 0.3331,
      "step": 4092
    },
    {
      "epoch": 0.1578,
      "grad_norm": 0.7665534019470215,
      "learning_rate": 9.69325576359672e-07,
      "loss": 0.3343,
      "step": 4093
    },
    {
      "epoch": 0.158,
      "grad_norm": 0.5262203812599182,
      "learning_rate": 9.67261025066339e-07,
      "loss": 0.3147,
      "step": 4094
    },
    {
      "epoch": 0.1582,
      "grad_norm": 0.4726792871952057,
      "learning_rate": 9.651984392809916e-07,
      "loss": 0.3514,
      "step": 4095
    },
    {
      "epoch": 0.1584,
      "grad_norm": 0.40167784690856934,
      "learning_rate": 9.631378200089082e-07,
      "loss": 0.2953,
      "step": 4096
    },
    {
      "epoch": 0.1586,
      "grad_norm": 0.5707964301109314,
      "learning_rate": 9.610791682544123e-07,
      "loss": 0.3214,
      "step": 4097
    },
    {
      "epoch": 0.1588,
      "grad_norm": 0.5136600732803345,
      "learning_rate": 9.590224850208645e-07,
      "loss": 0.2962,
      "step": 4098
    },
    {
      "epoch": 0.159,
      "grad_norm": 0.494357705116272,
      "learning_rate": 9.569677713106673e-07,
      "loss": 0.3541,
      "step": 4099
    },
    {
      "epoch": 0.1592,
      "grad_norm": 0.41720521450042725,
      "learning_rate": 9.549150281252633e-07,
      "loss": 0.3498,
      "step": 4100
    },
    {
      "epoch": 0.1594,
      "grad_norm": 0.40617436170578003,
      "learning_rate": 9.528642564651341e-07,
      "loss": 0.3285,
      "step": 4101
    },
    {
      "epoch": 0.1596,
      "grad_norm": 0.48208674788475037,
      "learning_rate": 9.508154573298012e-07,
      "loss": 0.3508,
      "step": 4102
    },
    {
      "epoch": 0.1598,
      "grad_norm": 0.48714208602905273,
      "learning_rate": 9.487686317178241e-07,
      "loss": 0.3222,
      "step": 4103
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.476303368806839,
      "learning_rate": 9.467237806268009e-07,
      "loss": 0.3284,
      "step": 4104
    },
    {
      "epoch": 0.1602,
      "grad_norm": 0.8222754597663879,
      "learning_rate": 9.446809050533679e-07,
      "loss": 0.3558,
      "step": 4105
    },
    {
      "epoch": 0.1604,
      "grad_norm": 0.4568268060684204,
      "learning_rate": 9.426400059931956e-07,
      "loss": 0.3493,
      "step": 4106
    },
    {
      "epoch": 0.1606,
      "grad_norm": 0.59563148021698,
      "learning_rate": 9.406010844409957e-07,
      "loss": 0.3735,
      "step": 4107
    },
    {
      "epoch": 0.1608,
      "grad_norm": 0.4053300619125366,
      "learning_rate": 9.385641413905139e-07,
      "loss": 0.3126,
      "step": 4108
    },
    {
      "epoch": 0.161,
      "grad_norm": 0.485772967338562,
      "learning_rate": 9.365291778345303e-07,
      "loss": 0.3334,
      "step": 4109
    },
    {
      "epoch": 0.1612,
      "grad_norm": 0.5048031806945801,
      "learning_rate": 9.344961947648624e-07,
      "loss": 0.3143,
      "step": 4110
    },
    {
      "epoch": 0.1614,
      "grad_norm": 0.3791309893131256,
      "learning_rate": 9.3246519317236e-07,
      "loss": 0.3292,
      "step": 4111
    },
    {
      "epoch": 0.1616,
      "grad_norm": 0.8904507756233215,
      "learning_rate": 9.304361740469103e-07,
      "loss": 0.3602,
      "step": 4112
    },
    {
      "epoch": 0.1618,
      "grad_norm": 0.7408453822135925,
      "learning_rate": 9.284091383774313e-07,
      "loss": 0.3342,
      "step": 4113
    },
    {
      "epoch": 0.162,
      "grad_norm": 0.9288884997367859,
      "learning_rate": 9.263840871518759e-07,
      "loss": 0.3183,
      "step": 4114
    },
    {
      "epoch": 0.1622,
      "grad_norm": 0.43700459599494934,
      "learning_rate": 9.243610213572285e-07,
      "loss": 0.3088,
      "step": 4115
    },
    {
      "epoch": 0.1624,
      "grad_norm": 0.5141071677207947,
      "learning_rate": 9.223399419795093e-07,
      "loss": 0.3177,
      "step": 4116
    },
    {
      "epoch": 0.1626,
      "grad_norm": 0.44177699089050293,
      "learning_rate": 9.203208500037664e-07,
      "loss": 0.3448,
      "step": 4117
    },
    {
      "epoch": 0.1628,
      "grad_norm": 0.6114684343338013,
      "learning_rate": 9.183037464140804e-07,
      "loss": 0.337,
      "step": 4118
    },
    {
      "epoch": 0.163,
      "grad_norm": 0.36222368478775024,
      "learning_rate": 9.162886321935632e-07,
      "loss": 0.3016,
      "step": 4119
    },
    {
      "epoch": 0.1632,
      "grad_norm": 0.5417529940605164,
      "learning_rate": 9.142755083243577e-07,
      "loss": 0.3287,
      "step": 4120
    },
    {
      "epoch": 0.1634,
      "grad_norm": 0.44710037112236023,
      "learning_rate": 9.122643757876354e-07,
      "loss": 0.3344,
      "step": 4121
    },
    {
      "epoch": 0.1636,
      "grad_norm": 0.48916029930114746,
      "learning_rate": 9.10255235563598e-07,
      "loss": 0.3427,
      "step": 4122
    },
    {
      "epoch": 0.1638,
      "grad_norm": 0.3739621043205261,
      "learning_rate": 9.08248088631476e-07,
      "loss": 0.2981,
      "step": 4123
    },
    {
      "epoch": 0.164,
      "grad_norm": 0.4085814952850342,
      "learning_rate": 9.06242935969528e-07,
      "loss": 0.3405,
      "step": 4124
    },
    {
      "epoch": 0.1642,
      "grad_norm": 0.4466877281665802,
      "learning_rate": 9.042397785550405e-07,
      "loss": 0.3344,
      "step": 4125
    },
    {
      "epoch": 0.1644,
      "grad_norm": 0.6079236268997192,
      "learning_rate": 9.022386173643305e-07,
      "loss": 0.3336,
      "step": 4126
    },
    {
      "epoch": 0.1646,
      "grad_norm": 0.4911084473133087,
      "learning_rate": 9.002394533727382e-07,
      "loss": 0.3267,
      "step": 4127
    },
    {
      "epoch": 0.1648,
      "grad_norm": 0.583573043346405,
      "learning_rate": 8.982422875546332e-07,
      "loss": 0.335,
      "step": 4128
    },
    {
      "epoch": 0.165,
      "grad_norm": 0.5865021347999573,
      "learning_rate": 8.962471208834056e-07,
      "loss": 0.3399,
      "step": 4129
    },
    {
      "epoch": 0.1652,
      "grad_norm": 0.9484260082244873,
      "learning_rate": 8.942539543314799e-07,
      "loss": 0.3244,
      "step": 4130
    },
    {
      "epoch": 0.1654,
      "grad_norm": 0.4212234914302826,
      "learning_rate": 8.922627888703e-07,
      "loss": 0.3212,
      "step": 4131
    },
    {
      "epoch": 0.1656,
      "grad_norm": 0.46344423294067383,
      "learning_rate": 8.902736254703347e-07,
      "loss": 0.3406,
      "step": 4132
    },
    {
      "epoch": 0.1658,
      "grad_norm": 0.3937184512615204,
      "learning_rate": 8.882864651010798e-07,
      "loss": 0.3094,
      "step": 4133
    },
    {
      "epoch": 0.166,
      "grad_norm": 0.48406580090522766,
      "learning_rate": 8.863013087310502e-07,
      "loss": 0.3504,
      "step": 4134
    },
    {
      "epoch": 0.1662,
      "grad_norm": 0.3853952884674072,
      "learning_rate": 8.843181573277904e-07,
      "loss": 0.3393,
      "step": 4135
    },
    {
      "epoch": 0.1664,
      "grad_norm": 0.4609629511833191,
      "learning_rate": 8.823370118578628e-07,
      "loss": 0.3196,
      "step": 4136
    },
    {
      "epoch": 0.1666,
      "grad_norm": 0.38491398096084595,
      "learning_rate": 8.803578732868545e-07,
      "loss": 0.3352,
      "step": 4137
    },
    {
      "epoch": 0.1668,
      "grad_norm": 0.4364599883556366,
      "learning_rate": 8.783807425793722e-07,
      "loss": 0.3276,
      "step": 4138
    },
    {
      "epoch": 0.167,
      "grad_norm": 0.4755050241947174,
      "learning_rate": 8.764056206990446e-07,
      "loss": 0.3266,
      "step": 4139
    },
    {
      "epoch": 0.1672,
      "grad_norm": 0.4339071214199066,
      "learning_rate": 8.744325086085248e-07,
      "loss": 0.3521,
      "step": 4140
    },
    {
      "epoch": 0.1674,
      "grad_norm": 0.4682512581348419,
      "learning_rate": 8.72461407269482e-07,
      "loss": 0.3039,
      "step": 4141
    },
    {
      "epoch": 0.1676,
      "grad_norm": 0.44356250762939453,
      "learning_rate": 8.704923176426072e-07,
      "loss": 0.3402,
      "step": 4142
    },
    {
      "epoch": 0.1678,
      "grad_norm": 0.4518812298774719,
      "learning_rate": 8.685252406876116e-07,
      "loss": 0.3021,
      "step": 4143
    },
    {
      "epoch": 0.168,
      "grad_norm": 0.5475386381149292,
      "learning_rate": 8.665601773632226e-07,
      "loss": 0.3075,
      "step": 4144
    },
    {
      "epoch": 0.1682,
      "grad_norm": 0.5443083047866821,
      "learning_rate": 8.645971286271903e-07,
      "loss": 0.3454,
      "step": 4145
    },
    {
      "epoch": 0.1684,
      "grad_norm": 0.4554557204246521,
      "learning_rate": 8.626360954362817e-07,
      "loss": 0.3552,
      "step": 4146
    },
    {
      "epoch": 0.1686,
      "grad_norm": 0.5185648202896118,
      "learning_rate": 8.606770787462776e-07,
      "loss": 0.3175,
      "step": 4147
    },
    {
      "epoch": 0.1688,
      "grad_norm": 0.546196699142456,
      "learning_rate": 8.587200795119793e-07,
      "loss": 0.3517,
      "step": 4148
    },
    {
      "epoch": 0.169,
      "grad_norm": 0.4841652810573578,
      "learning_rate": 8.567650986872061e-07,
      "loss": 0.3314,
      "step": 4149
    },
    {
      "epoch": 0.1692,
      "grad_norm": 0.7829350829124451,
      "learning_rate": 8.54812137224792e-07,
      "loss": 0.3319,
      "step": 4150
    },
    {
      "epoch": 0.1694,
      "grad_norm": 0.35581210255622864,
      "learning_rate": 8.528611960765853e-07,
      "loss": 0.2889,
      "step": 4151
    },
    {
      "epoch": 0.1696,
      "grad_norm": 0.44613394141197205,
      "learning_rate": 8.509122761934519e-07,
      "loss": 0.3128,
      "step": 4152
    },
    {
      "epoch": 0.1698,
      "grad_norm": 0.4932778775691986,
      "learning_rate": 8.489653785252711e-07,
      "loss": 0.3428,
      "step": 4153
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.4471854567527771,
      "learning_rate": 8.470205040209362e-07,
      "loss": 0.3334,
      "step": 4154
    },
    {
      "epoch": 0.1702,
      "grad_norm": 0.4672234356403351,
      "learning_rate": 8.450776536283594e-07,
      "loss": 0.3768,
      "step": 4155
    },
    {
      "epoch": 0.1704,
      "grad_norm": 0.6407728791236877,
      "learning_rate": 8.431368282944585e-07,
      "loss": 0.3252,
      "step": 4156
    },
    {
      "epoch": 0.1706,
      "grad_norm": 1.0064738988876343,
      "learning_rate": 8.411980289651689e-07,
      "loss": 0.3357,
      "step": 4157
    },
    {
      "epoch": 0.1708,
      "grad_norm": 0.5494783520698547,
      "learning_rate": 8.392612565854374e-07,
      "loss": 0.3587,
      "step": 4158
    },
    {
      "epoch": 0.171,
      "grad_norm": 0.3895999789237976,
      "learning_rate": 8.373265120992252e-07,
      "loss": 0.3286,
      "step": 4159
    },
    {
      "epoch": 0.1712,
      "grad_norm": 0.4431615471839905,
      "learning_rate": 8.353937964495029e-07,
      "loss": 0.3319,
      "step": 4160
    },
    {
      "epoch": 0.1714,
      "grad_norm": 0.38219135999679565,
      "learning_rate": 8.334631105782515e-07,
      "loss": 0.31,
      "step": 4161
    },
    {
      "epoch": 0.1716,
      "grad_norm": 0.5133519172668457,
      "learning_rate": 8.315344554264643e-07,
      "loss": 0.3244,
      "step": 4162
    },
    {
      "epoch": 0.1718,
      "grad_norm": 0.41689470410346985,
      "learning_rate": 8.296078319341444e-07,
      "loss": 0.315,
      "step": 4163
    },
    {
      "epoch": 0.172,
      "grad_norm": 0.48168909549713135,
      "learning_rate": 8.276832410403051e-07,
      "loss": 0.3296,
      "step": 4164
    },
    {
      "epoch": 0.1722,
      "grad_norm": 0.45348185300827026,
      "learning_rate": 8.25760683682968e-07,
      "loss": 0.3587,
      "step": 4165
    },
    {
      "epoch": 0.1724,
      "grad_norm": 0.6795768737792969,
      "learning_rate": 8.238401607991647e-07,
      "loss": 0.3508,
      "step": 4166
    },
    {
      "epoch": 0.1726,
      "grad_norm": 0.38710740208625793,
      "learning_rate": 8.21921673324933e-07,
      "loss": 0.3102,
      "step": 4167
    },
    {
      "epoch": 0.1728,
      "grad_norm": 0.3979049623012543,
      "learning_rate": 8.200052221953231e-07,
      "loss": 0.3164,
      "step": 4168
    },
    {
      "epoch": 0.173,
      "grad_norm": 0.43946194648742676,
      "learning_rate": 8.180908083443884e-07,
      "loss": 0.3403,
      "step": 4169
    },
    {
      "epoch": 0.1732,
      "grad_norm": 1.07711660861969,
      "learning_rate": 8.161784327051919e-07,
      "loss": 0.3095,
      "step": 4170
    },
    {
      "epoch": 0.1734,
      "grad_norm": 0.4134789705276489,
      "learning_rate": 8.142680962098016e-07,
      "loss": 0.3136,
      "step": 4171
    },
    {
      "epoch": 0.1736,
      "grad_norm": 0.43630585074424744,
      "learning_rate": 8.123597997892918e-07,
      "loss": 0.3287,
      "step": 4172
    },
    {
      "epoch": 0.1738,
      "grad_norm": 0.6216702461242676,
      "learning_rate": 8.104535443737438e-07,
      "loss": 0.3511,
      "step": 4173
    },
    {
      "epoch": 0.174,
      "grad_norm": 0.3711797595024109,
      "learning_rate": 8.085493308922432e-07,
      "loss": 0.2964,
      "step": 4174
    },
    {
      "epoch": 0.1742,
      "grad_norm": 0.5246741771697998,
      "learning_rate": 8.066471602728804e-07,
      "loss": 0.3431,
      "step": 4175
    },
    {
      "epoch": 0.1744,
      "grad_norm": 0.3773944675922394,
      "learning_rate": 8.047470334427504e-07,
      "loss": 0.3098,
      "step": 4176
    },
    {
      "epoch": 0.1746,
      "grad_norm": 0.4509964883327484,
      "learning_rate": 8.028489513279503e-07,
      "loss": 0.3033,
      "step": 4177
    },
    {
      "epoch": 0.1748,
      "grad_norm": 0.4543502628803253,
      "learning_rate": 8.009529148535855e-07,
      "loss": 0.3374,
      "step": 4178
    },
    {
      "epoch": 0.175,
      "grad_norm": 0.39049074053764343,
      "learning_rate": 7.990589249437591e-07,
      "loss": 0.3029,
      "step": 4179
    },
    {
      "epoch": 0.1752,
      "grad_norm": 0.4311857223510742,
      "learning_rate": 7.971669825215789e-07,
      "loss": 0.3462,
      "step": 4180
    },
    {
      "epoch": 0.1754,
      "grad_norm": 0.5172101855278015,
      "learning_rate": 7.952770885091548e-07,
      "loss": 0.3175,
      "step": 4181
    },
    {
      "epoch": 0.1756,
      "grad_norm": 0.42442554235458374,
      "learning_rate": 7.933892438275987e-07,
      "loss": 0.329,
      "step": 4182
    },
    {
      "epoch": 0.1758,
      "grad_norm": 0.37036147713661194,
      "learning_rate": 7.91503449397022e-07,
      "loss": 0.3237,
      "step": 4183
    },
    {
      "epoch": 0.176,
      "grad_norm": 0.6183568239212036,
      "learning_rate": 7.89619706136539e-07,
      "loss": 0.3072,
      "step": 4184
    },
    {
      "epoch": 0.1762,
      "grad_norm": 0.4003663659095764,
      "learning_rate": 7.877380149642628e-07,
      "loss": 0.3172,
      "step": 4185
    },
    {
      "epoch": 0.1764,
      "grad_norm": 0.4506908059120178,
      "learning_rate": 7.858583767973071e-07,
      "loss": 0.3261,
      "step": 4186
    },
    {
      "epoch": 0.1766,
      "grad_norm": 0.5902057886123657,
      "learning_rate": 7.839807925517834e-07,
      "loss": 0.3384,
      "step": 4187
    },
    {
      "epoch": 0.1768,
      "grad_norm": 0.4630206525325775,
      "learning_rate": 7.821052631428061e-07,
      "loss": 0.3202,
      "step": 4188
    },
    {
      "epoch": 0.177,
      "grad_norm": 0.524746298789978,
      "learning_rate": 7.802317894844835e-07,
      "loss": 0.3004,
      "step": 4189
    },
    {
      "epoch": 0.1772,
      "grad_norm": 0.41191667318344116,
      "learning_rate": 7.783603724899258e-07,
      "loss": 0.3218,
      "step": 4190
    },
    {
      "epoch": 0.1774,
      "grad_norm": 0.40045857429504395,
      "learning_rate": 7.76491013071235e-07,
      "loss": 0.3295,
      "step": 4191
    },
    {
      "epoch": 0.1776,
      "grad_norm": 0.3683210015296936,
      "learning_rate": 7.746237121395184e-07,
      "loss": 0.3255,
      "step": 4192
    },
    {
      "epoch": 0.1778,
      "grad_norm": 0.9031592011451721,
      "learning_rate": 7.727584706048735e-07,
      "loss": 0.3158,
      "step": 4193
    },
    {
      "epoch": 0.178,
      "grad_norm": 0.6220213770866394,
      "learning_rate": 7.708952893763972e-07,
      "loss": 0.3343,
      "step": 4194
    },
    {
      "epoch": 0.1782,
      "grad_norm": 0.37844765186309814,
      "learning_rate": 7.690341693621805e-07,
      "loss": 0.3267,
      "step": 4195
    },
    {
      "epoch": 0.1784,
      "grad_norm": 0.46119025349617004,
      "learning_rate": 7.671751114693104e-07,
      "loss": 0.332,
      "step": 4196
    },
    {
      "epoch": 0.1786,
      "grad_norm": 0.43968144059181213,
      "learning_rate": 7.653181166038715e-07,
      "loss": 0.377,
      "step": 4197
    },
    {
      "epoch": 0.1788,
      "grad_norm": 0.529751181602478,
      "learning_rate": 7.63463185670939e-07,
      "loss": 0.303,
      "step": 4198
    },
    {
      "epoch": 0.179,
      "grad_norm": 0.3810611963272095,
      "learning_rate": 7.61610319574585e-07,
      "loss": 0.3295,
      "step": 4199
    },
    {
      "epoch": 0.1792,
      "grad_norm": 0.4032723307609558,
      "learning_rate": 7.597595192178702e-07,
      "loss": 0.3343,
      "step": 4200
    },
    {
      "epoch": 0.1794,
      "grad_norm": 0.4092893600463867,
      "learning_rate": 7.579107855028562e-07,
      "loss": 0.3032,
      "step": 4201
    },
    {
      "epoch": 0.1796,
      "grad_norm": 0.8454665541648865,
      "learning_rate": 7.560641193305912e-07,
      "loss": 0.3044,
      "step": 4202
    },
    {
      "epoch": 0.1798,
      "grad_norm": 2.568070650100708,
      "learning_rate": 7.542195216011188e-07,
      "loss": 0.325,
      "step": 4203
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.41539451479911804,
      "learning_rate": 7.523769932134739e-07,
      "loss": 0.355,
      "step": 4204
    },
    {
      "epoch": 0.1802,
      "grad_norm": 0.42245131731033325,
      "learning_rate": 7.505365350656813e-07,
      "loss": 0.3124,
      "step": 4205
    },
    {
      "epoch": 0.1804,
      "grad_norm": 0.4458777904510498,
      "learning_rate": 7.486981480547567e-07,
      "loss": 0.3465,
      "step": 4206
    },
    {
      "epoch": 0.1806,
      "grad_norm": 0.38280248641967773,
      "learning_rate": 7.468618330767114e-07,
      "loss": 0.3131,
      "step": 4207
    },
    {
      "epoch": 0.1808,
      "grad_norm": 0.4207287132740021,
      "learning_rate": 7.450275910265415e-07,
      "loss": 0.3656,
      "step": 4208
    },
    {
      "epoch": 0.181,
      "grad_norm": 0.38232171535491943,
      "learning_rate": 7.43195422798233e-07,
      "loss": 0.3503,
      "step": 4209
    },
    {
      "epoch": 0.1812,
      "grad_norm": 0.5226112008094788,
      "learning_rate": 7.413653292847617e-07,
      "loss": 0.3523,
      "step": 4210
    },
    {
      "epoch": 0.1814,
      "grad_norm": 0.5234969854354858,
      "learning_rate": 7.395373113780962e-07,
      "loss": 0.3598,
      "step": 4211
    },
    {
      "epoch": 0.1816,
      "grad_norm": 0.39401721954345703,
      "learning_rate": 7.377113699691879e-07,
      "loss": 0.3525,
      "step": 4212
    },
    {
      "epoch": 0.1818,
      "grad_norm": 0.4804239273071289,
      "learning_rate": 7.358875059479792e-07,
      "loss": 0.3383,
      "step": 4213
    },
    {
      "epoch": 0.182,
      "grad_norm": 0.5808135867118835,
      "learning_rate": 7.34065720203399e-07,
      "loss": 0.3144,
      "step": 4214
    },
    {
      "epoch": 0.1822,
      "grad_norm": 0.5226419568061829,
      "learning_rate": 7.322460136233622e-07,
      "loss": 0.3711,
      "step": 4215
    },
    {
      "epoch": 0.1824,
      "grad_norm": 0.4815920293331146,
      "learning_rate": 7.304283870947748e-07,
      "loss": 0.345,
      "step": 4216
    },
    {
      "epoch": 0.1826,
      "grad_norm": 0.5922989249229431,
      "learning_rate": 7.286128415035249e-07,
      "loss": 0.3322,
      "step": 4217
    },
    {
      "epoch": 0.1828,
      "grad_norm": 0.510578453540802,
      "learning_rate": 7.267993777344856e-07,
      "loss": 0.3392,
      "step": 4218
    },
    {
      "epoch": 0.183,
      "grad_norm": 0.4938342571258545,
      "learning_rate": 7.249879966715174e-07,
      "loss": 0.381,
      "step": 4219
    },
    {
      "epoch": 0.1832,
      "grad_norm": 0.4712236225605011,
      "learning_rate": 7.23178699197467e-07,
      "loss": 0.3394,
      "step": 4220
    },
    {
      "epoch": 0.1834,
      "grad_norm": 0.38635963201522827,
      "learning_rate": 7.213714861941628e-07,
      "loss": 0.3243,
      "step": 4221
    },
    {
      "epoch": 0.1836,
      "grad_norm": 0.4652073383331299,
      "learning_rate": 7.195663585424195e-07,
      "loss": 0.3535,
      "step": 4222
    },
    {
      "epoch": 0.1838,
      "grad_norm": 0.48306000232696533,
      "learning_rate": 7.177633171220339e-07,
      "loss": 0.3335,
      "step": 4223
    },
    {
      "epoch": 0.184,
      "grad_norm": 0.5026137828826904,
      "learning_rate": 7.159623628117856e-07,
      "loss": 0.313,
      "step": 4224
    },
    {
      "epoch": 0.1842,
      "grad_norm": 0.4048404395580292,
      "learning_rate": 7.141634964894389e-07,
      "loss": 0.3196,
      "step": 4225
    },
    {
      "epoch": 0.1844,
      "grad_norm": 0.44570332765579224,
      "learning_rate": 7.123667190317396e-07,
      "loss": 0.3564,
      "step": 4226
    },
    {
      "epoch": 0.1846,
      "grad_norm": 0.4853290021419525,
      "learning_rate": 7.105720313144143e-07,
      "loss": 0.3297,
      "step": 4227
    },
    {
      "epoch": 0.1848,
      "grad_norm": 0.35463351011276245,
      "learning_rate": 7.087794342121724e-07,
      "loss": 0.3094,
      "step": 4228
    },
    {
      "epoch": 0.185,
      "grad_norm": 0.4485372304916382,
      "learning_rate": 7.069889285987025e-07,
      "loss": 0.3741,
      "step": 4229
    },
    {
      "epoch": 0.1852,
      "grad_norm": 0.4001222550868988,
      "learning_rate": 7.052005153466779e-07,
      "loss": 0.3184,
      "step": 4230
    },
    {
      "epoch": 0.1854,
      "grad_norm": 0.4447455108165741,
      "learning_rate": 7.034141953277484e-07,
      "loss": 0.3204,
      "step": 4231
    },
    {
      "epoch": 0.1856,
      "grad_norm": 0.6049482822418213,
      "learning_rate": 7.01629969412545e-07,
      "loss": 0.3355,
      "step": 4232
    },
    {
      "epoch": 0.1858,
      "grad_norm": 0.6523964405059814,
      "learning_rate": 6.99847838470677e-07,
      "loss": 0.3407,
      "step": 4233
    },
    {
      "epoch": 0.186,
      "grad_norm": 0.501699686050415,
      "learning_rate": 6.980678033707333e-07,
      "loss": 0.3214,
      "step": 4234
    },
    {
      "epoch": 0.1862,
      "grad_norm": 0.5105911493301392,
      "learning_rate": 6.962898649802824e-07,
      "loss": 0.3517,
      "step": 4235
    },
    {
      "epoch": 0.1864,
      "grad_norm": 1.798464298248291,
      "learning_rate": 6.945140241658688e-07,
      "loss": 0.3298,
      "step": 4236
    },
    {
      "epoch": 0.1866,
      "grad_norm": 0.43306228518486023,
      "learning_rate": 6.927402817930168e-07,
      "loss": 0.3008,
      "step": 4237
    },
    {
      "epoch": 0.1868,
      "grad_norm": 0.5078611969947815,
      "learning_rate": 6.909686387262255e-07,
      "loss": 0.3132,
      "step": 4238
    },
    {
      "epoch": 0.187,
      "grad_norm": 0.5211454033851624,
      "learning_rate": 6.891990958289724e-07,
      "loss": 0.3196,
      "step": 4239
    },
    {
      "epoch": 0.1872,
      "grad_norm": 0.42910122871398926,
      "learning_rate": 6.874316539637127e-07,
      "loss": 0.3083,
      "step": 4240
    },
    {
      "epoch": 0.1874,
      "grad_norm": 0.4431501030921936,
      "learning_rate": 6.856663139918751e-07,
      "loss": 0.3259,
      "step": 4241
    },
    {
      "epoch": 0.1876,
      "grad_norm": 0.44149643182754517,
      "learning_rate": 6.839030767738653e-07,
      "loss": 0.3366,
      "step": 4242
    },
    {
      "epoch": 0.1878,
      "grad_norm": 0.3815474808216095,
      "learning_rate": 6.821419431690629e-07,
      "loss": 0.302,
      "step": 4243
    },
    {
      "epoch": 0.188,
      "grad_norm": 0.4178428649902344,
      "learning_rate": 6.803829140358237e-07,
      "loss": 0.3109,
      "step": 4244
    },
    {
      "epoch": 0.1882,
      "grad_norm": 0.4966701865196228,
      "learning_rate": 6.786259902314768e-07,
      "loss": 0.2996,
      "step": 4245
    },
    {
      "epoch": 0.1884,
      "grad_norm": 0.5173516869544983,
      "learning_rate": 6.768711726123261e-07,
      "loss": 0.3372,
      "step": 4246
    },
    {
      "epoch": 0.1886,
      "grad_norm": 0.9003250598907471,
      "learning_rate": 6.751184620336471e-07,
      "loss": 0.3188,
      "step": 4247
    },
    {
      "epoch": 0.1888,
      "grad_norm": 0.45962780714035034,
      "learning_rate": 6.733678593496901e-07,
      "loss": 0.3608,
      "step": 4248
    },
    {
      "epoch": 0.189,
      "grad_norm": 0.6226767301559448,
      "learning_rate": 6.716193654136788e-07,
      "loss": 0.3177,
      "step": 4249
    },
    {
      "epoch": 0.1892,
      "grad_norm": 0.7946039438247681,
      "learning_rate": 6.698729810778065e-07,
      "loss": 0.3396,
      "step": 4250
    },
    {
      "epoch": 0.1894,
      "grad_norm": 0.3778741657733917,
      "learning_rate": 6.681287071932408e-07,
      "loss": 0.2911,
      "step": 4251
    },
    {
      "epoch": 0.1896,
      "grad_norm": 0.4238417446613312,
      "learning_rate": 6.663865446101192e-07,
      "loss": 0.3257,
      "step": 4252
    },
    {
      "epoch": 0.1898,
      "grad_norm": 0.5387388467788696,
      "learning_rate": 6.646464941775499e-07,
      "loss": 0.3181,
      "step": 4253
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.40593552589416504,
      "learning_rate": 6.629085567436133e-07,
      "loss": 0.3327,
      "step": 4254
    },
    {
      "epoch": 0.1902,
      "grad_norm": 0.4995386600494385,
      "learning_rate": 6.611727331553585e-07,
      "loss": 0.364,
      "step": 4255
    },
    {
      "epoch": 0.1904,
      "grad_norm": 0.5271826386451721,
      "learning_rate": 6.594390242588044e-07,
      "loss": 0.3286,
      "step": 4256
    },
    {
      "epoch": 0.1906,
      "grad_norm": 0.4121954143047333,
      "learning_rate": 6.577074308989406e-07,
      "loss": 0.3173,
      "step": 4257
    },
    {
      "epoch": 0.1908,
      "grad_norm": 0.8675887584686279,
      "learning_rate": 6.559779539197231e-07,
      "loss": 0.3369,
      "step": 4258
    },
    {
      "epoch": 0.191,
      "grad_norm": 0.3597026765346527,
      "learning_rate": 6.542505941640803e-07,
      "loss": 0.3202,
      "step": 4259
    },
    {
      "epoch": 0.1912,
      "grad_norm": 0.44780388474464417,
      "learning_rate": 6.52525352473905e-07,
      "loss": 0.3248,
      "step": 4260
    },
    {
      "epoch": 0.1914,
      "grad_norm": 0.4772929251194,
      "learning_rate": 6.508022296900601e-07,
      "loss": 0.3174,
      "step": 4261
    },
    {
      "epoch": 0.1916,
      "grad_norm": 0.42489176988601685,
      "learning_rate": 6.490812266523716e-07,
      "loss": 0.3143,
      "step": 4262
    },
    {
      "epoch": 0.1918,
      "grad_norm": 0.400266170501709,
      "learning_rate": 6.47362344199639e-07,
      "loss": 0.3775,
      "step": 4263
    },
    {
      "epoch": 0.192,
      "grad_norm": 0.41139933466911316,
      "learning_rate": 6.456455831696234e-07,
      "loss": 0.3249,
      "step": 4264
    },
    {
      "epoch": 0.1922,
      "grad_norm": 0.3988403379917145,
      "learning_rate": 6.439309443990532e-07,
      "loss": 0.3275,
      "step": 4265
    },
    {
      "epoch": 0.1924,
      "grad_norm": 0.5380271077156067,
      "learning_rate": 6.422184287236227e-07,
      "loss": 0.3422,
      "step": 4266
    },
    {
      "epoch": 0.1926,
      "grad_norm": 0.6228172183036804,
      "learning_rate": 6.405080369779898e-07,
      "loss": 0.3327,
      "step": 4267
    },
    {
      "epoch": 0.1928,
      "grad_norm": 0.4051739573478699,
      "learning_rate": 6.387997699957815e-07,
      "loss": 0.3134,
      "step": 4268
    },
    {
      "epoch": 0.193,
      "grad_norm": 0.4279744327068329,
      "learning_rate": 6.370936286095842e-07,
      "loss": 0.3257,
      "step": 4269
    },
    {
      "epoch": 0.1932,
      "grad_norm": 0.3764544427394867,
      "learning_rate": 6.353896136509524e-07,
      "loss": 0.3122,
      "step": 4270
    },
    {
      "epoch": 0.1934,
      "grad_norm": 0.405271977186203,
      "learning_rate": 6.336877259504004e-07,
      "loss": 0.3034,
      "step": 4271
    },
    {
      "epoch": 0.1936,
      "grad_norm": 0.48016923666000366,
      "learning_rate": 6.319879663374068e-07,
      "loss": 0.3397,
      "step": 4272
    },
    {
      "epoch": 0.1938,
      "grad_norm": 0.42574942111968994,
      "learning_rate": 6.302903356404161e-07,
      "loss": 0.3157,
      "step": 4273
    },
    {
      "epoch": 0.194,
      "grad_norm": 0.5416457653045654,
      "learning_rate": 6.28594834686832e-07,
      "loss": 0.3103,
      "step": 4274
    },
    {
      "epoch": 0.1942,
      "grad_norm": 0.4580998718738556,
      "learning_rate": 6.269014643030214e-07,
      "loss": 0.379,
      "step": 4275
    },
    {
      "epoch": 0.1944,
      "grad_norm": 0.4986753463745117,
      "learning_rate": 6.252102253143122e-07,
      "loss": 0.3625,
      "step": 4276
    },
    {
      "epoch": 0.1946,
      "grad_norm": 1.9725667238235474,
      "learning_rate": 6.235211185449919e-07,
      "loss": 0.3334,
      "step": 4277
    },
    {
      "epoch": 0.1948,
      "grad_norm": 0.4109601080417633,
      "learning_rate": 6.218341448183141e-07,
      "loss": 0.3581,
      "step": 4278
    },
    {
      "epoch": 0.195,
      "grad_norm": 0.7461367845535278,
      "learning_rate": 6.201493049564883e-07,
      "loss": 0.314,
      "step": 4279
    },
    {
      "epoch": 0.1952,
      "grad_norm": 0.42090922594070435,
      "learning_rate": 6.184665997806832e-07,
      "loss": 0.3249,
      "step": 4280
    },
    {
      "epoch": 0.1954,
      "grad_norm": 0.3801571726799011,
      "learning_rate": 6.167860301110284e-07,
      "loss": 0.314,
      "step": 4281
    },
    {
      "epoch": 0.1956,
      "grad_norm": 0.41147029399871826,
      "learning_rate": 6.151075967666165e-07,
      "loss": 0.309,
      "step": 4282
    },
    {
      "epoch": 0.1958,
      "grad_norm": 0.46382325887680054,
      "learning_rate": 6.134313005654929e-07,
      "loss": 0.3561,
      "step": 4283
    },
    {
      "epoch": 0.196,
      "grad_norm": 0.39419668912887573,
      "learning_rate": 6.117571423246655e-07,
      "loss": 0.3289,
      "step": 4284
    },
    {
      "epoch": 0.1962,
      "grad_norm": 0.4356108605861664,
      "learning_rate": 6.100851228600974e-07,
      "loss": 0.3255,
      "step": 4285
    },
    {
      "epoch": 0.1964,
      "grad_norm": 0.6634084582328796,
      "learning_rate": 6.084152429867113e-07,
      "loss": 0.3192,
      "step": 4286
    },
    {
      "epoch": 0.1966,
      "grad_norm": 0.6168258786201477,
      "learning_rate": 6.067475035183862e-07,
      "loss": 0.3342,
      "step": 4287
    },
    {
      "epoch": 0.1968,
      "grad_norm": 0.4496491849422455,
      "learning_rate": 6.050819052679585e-07,
      "loss": 0.3439,
      "step": 4288
    },
    {
      "epoch": 0.197,
      "grad_norm": 0.5891499519348145,
      "learning_rate": 6.034184490472195e-07,
      "loss": 0.3426,
      "step": 4289
    },
    {
      "epoch": 0.1972,
      "grad_norm": 0.3760116994380951,
      "learning_rate": 6.017571356669183e-07,
      "loss": 0.3482,
      "step": 4290
    },
    {
      "epoch": 0.1974,
      "grad_norm": 0.3706572651863098,
      "learning_rate": 6.000979659367579e-07,
      "loss": 0.2922,
      "step": 4291
    },
    {
      "epoch": 0.1976,
      "grad_norm": 0.4468158781528473,
      "learning_rate": 5.98440940665399e-07,
      "loss": 0.3316,
      "step": 4292
    },
    {
      "epoch": 0.1978,
      "grad_norm": 0.5071824789047241,
      "learning_rate": 5.967860606604553e-07,
      "loss": 0.3117,
      "step": 4293
    },
    {
      "epoch": 0.198,
      "grad_norm": 0.4274236261844635,
      "learning_rate": 5.951333267284942e-07,
      "loss": 0.3502,
      "step": 4294
    },
    {
      "epoch": 0.1982,
      "grad_norm": 0.43693265318870544,
      "learning_rate": 5.934827396750392e-07,
      "loss": 0.3693,
      "step": 4295
    },
    {
      "epoch": 0.1984,
      "grad_norm": 0.48027151823043823,
      "learning_rate": 5.918343003045656e-07,
      "loss": 0.3469,
      "step": 4296
    },
    {
      "epoch": 0.1986,
      "grad_norm": 0.4870007634162903,
      "learning_rate": 5.901880094205037e-07,
      "loss": 0.3524,
      "step": 4297
    },
    {
      "epoch": 0.1988,
      "grad_norm": 0.5042244791984558,
      "learning_rate": 5.885438678252342e-07,
      "loss": 0.3605,
      "step": 4298
    },
    {
      "epoch": 0.199,
      "grad_norm": 0.46256402134895325,
      "learning_rate": 5.869018763200929e-07,
      "loss": 0.3274,
      "step": 4299
    },
    {
      "epoch": 0.1992,
      "grad_norm": 0.5749567151069641,
      "learning_rate": 5.852620357053651e-07,
      "loss": 0.3027,
      "step": 4300
    },
    {
      "epoch": 0.1994,
      "grad_norm": 0.37867471575737,
      "learning_rate": 5.836243467802915e-07,
      "loss": 0.3113,
      "step": 4301
    },
    {
      "epoch": 0.1996,
      "grad_norm": 0.40484026074409485,
      "learning_rate": 5.819888103430598e-07,
      "loss": 0.3644,
      "step": 4302
    },
    {
      "epoch": 0.1998,
      "grad_norm": 0.38664183020591736,
      "learning_rate": 5.803554271908124e-07,
      "loss": 0.2953,
      "step": 4303
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.4543973505496979,
      "learning_rate": 5.787241981196384e-07,
      "loss": 0.3449,
      "step": 4304
    },
    {
      "epoch": 0.2002,
      "grad_norm": 0.40553709864616394,
      "learning_rate": 5.770951239245803e-07,
      "loss": 0.3018,
      "step": 4305
    },
    {
      "epoch": 0.2004,
      "grad_norm": 0.42019322514533997,
      "learning_rate": 5.754682053996291e-07,
      "loss": 0.3165,
      "step": 4306
    },
    {
      "epoch": 0.2006,
      "grad_norm": 0.41813796758651733,
      "learning_rate": 5.738434433377244e-07,
      "loss": 0.3322,
      "step": 4307
    },
    {
      "epoch": 0.2008,
      "grad_norm": 0.4440661668777466,
      "learning_rate": 5.722208385307559e-07,
      "loss": 0.3161,
      "step": 4308
    },
    {
      "epoch": 0.201,
      "grad_norm": 0.4680679738521576,
      "learning_rate": 5.706003917695619e-07,
      "loss": 0.3484,
      "step": 4309
    },
    {
      "epoch": 0.2012,
      "grad_norm": 0.4659935235977173,
      "learning_rate": 5.689821038439264e-07,
      "loss": 0.3242,
      "step": 4310
    },
    {
      "epoch": 0.2014,
      "grad_norm": 0.44912609457969666,
      "learning_rate": 5.673659755425859e-07,
      "loss": 0.3215,
      "step": 4311
    },
    {
      "epoch": 0.2016,
      "grad_norm": 0.49026915431022644,
      "learning_rate": 5.657520076532208e-07,
      "loss": 0.3302,
      "step": 4312
    },
    {
      "epoch": 0.2018,
      "grad_norm": 0.6117969155311584,
      "learning_rate": 5.641402009624591e-07,
      "loss": 0.32,
      "step": 4313
    },
    {
      "epoch": 0.202,
      "grad_norm": 0.6977234482765198,
      "learning_rate": 5.625305562558764e-07,
      "loss": 0.3095,
      "step": 4314
    },
    {
      "epoch": 0.2022,
      "grad_norm": 0.4388526678085327,
      "learning_rate": 5.609230743179939e-07,
      "loss": 0.3236,
      "step": 4315
    },
    {
      "epoch": 0.2024,
      "grad_norm": 0.4298993945121765,
      "learning_rate": 5.593177559322776e-07,
      "loss": 0.377,
      "step": 4316
    },
    {
      "epoch": 0.2026,
      "grad_norm": 0.6065451502799988,
      "learning_rate": 5.577146018811419e-07,
      "loss": 0.313,
      "step": 4317
    },
    {
      "epoch": 0.2028,
      "grad_norm": 0.4937712848186493,
      "learning_rate": 5.561136129459432e-07,
      "loss": 0.3296,
      "step": 4318
    },
    {
      "epoch": 0.203,
      "grad_norm": 0.4707382023334503,
      "learning_rate": 5.545147899069836e-07,
      "loss": 0.3025,
      "step": 4319
    },
    {
      "epoch": 0.2032,
      "grad_norm": 0.4551718235015869,
      "learning_rate": 5.529181335435124e-07,
      "loss": 0.2969,
      "step": 4320
    },
    {
      "epoch": 0.2034,
      "grad_norm": 0.40204402804374695,
      "learning_rate": 5.51323644633719e-07,
      "loss": 0.3083,
      "step": 4321
    },
    {
      "epoch": 0.2036,
      "grad_norm": 0.41730615496635437,
      "learning_rate": 5.497313239547374e-07,
      "loss": 0.3453,
      "step": 4322
    },
    {
      "epoch": 0.2038,
      "grad_norm": 0.43877631425857544,
      "learning_rate": 5.48141172282648e-07,
      "loss": 0.334,
      "step": 4323
    },
    {
      "epoch": 0.204,
      "grad_norm": 0.4810005724430084,
      "learning_rate": 5.46553190392467e-07,
      "loss": 0.3477,
      "step": 4324
    },
    {
      "epoch": 0.2042,
      "grad_norm": 0.48947155475616455,
      "learning_rate": 5.449673790581611e-07,
      "loss": 0.3348,
      "step": 4325
    },
    {
      "epoch": 0.2044,
      "grad_norm": 0.42113959789276123,
      "learning_rate": 5.433837390526341e-07,
      "loss": 0.3571,
      "step": 4326
    },
    {
      "epoch": 0.2046,
      "grad_norm": 0.6002999544143677,
      "learning_rate": 5.418022711477333e-07,
      "loss": 0.3431,
      "step": 4327
    },
    {
      "epoch": 0.2048,
      "grad_norm": 0.658279299736023,
      "learning_rate": 5.402229761142464e-07,
      "loss": 0.2952,
      "step": 4328
    },
    {
      "epoch": 0.205,
      "grad_norm": 0.42481720447540283,
      "learning_rate": 5.386458547219026e-07,
      "loss": 0.3291,
      "step": 4329
    },
    {
      "epoch": 0.2052,
      "grad_norm": 0.38538801670074463,
      "learning_rate": 5.370709077393721e-07,
      "loss": 0.3396,
      "step": 4330
    },
    {
      "epoch": 0.2054,
      "grad_norm": 0.5265108346939087,
      "learning_rate": 5.354981359342659e-07,
      "loss": 0.328,
      "step": 4331
    },
    {
      "epoch": 0.2056,
      "grad_norm": 0.465182900428772,
      "learning_rate": 5.339275400731331e-07,
      "loss": 0.3343,
      "step": 4332
    },
    {
      "epoch": 0.2058,
      "grad_norm": 0.4096553325653076,
      "learning_rate": 5.323591209214612e-07,
      "loss": 0.3212,
      "step": 4333
    },
    {
      "epoch": 0.206,
      "grad_norm": 0.42504778504371643,
      "learning_rate": 5.307928792436812e-07,
      "loss": 0.3214,
      "step": 4334
    },
    {
      "epoch": 0.2062,
      "grad_norm": 0.4309401214122772,
      "learning_rate": 5.292288158031595e-07,
      "loss": 0.3385,
      "step": 4335
    },
    {
      "epoch": 0.2064,
      "grad_norm": 0.4233052730560303,
      "learning_rate": 5.276669313622013e-07,
      "loss": 0.3592,
      "step": 4336
    },
    {
      "epoch": 0.2066,
      "grad_norm": 0.4732259213924408,
      "learning_rate": 5.2610722668205e-07,
      "loss": 0.3159,
      "step": 4337
    },
    {
      "epoch": 0.2068,
      "grad_norm": 0.3886144161224365,
      "learning_rate": 5.245497025228874e-07,
      "loss": 0.3273,
      "step": 4338
    },
    {
      "epoch": 0.207,
      "grad_norm": 0.4061303436756134,
      "learning_rate": 5.229943596438297e-07,
      "loss": 0.3284,
      "step": 4339
    },
    {
      "epoch": 0.2072,
      "grad_norm": 0.4649781286716461,
      "learning_rate": 5.214411988029355e-07,
      "loss": 0.3161,
      "step": 4340
    },
    {
      "epoch": 0.2074,
      "grad_norm": 0.4423719048500061,
      "learning_rate": 5.198902207571955e-07,
      "loss": 0.3333,
      "step": 4341
    },
    {
      "epoch": 0.2076,
      "grad_norm": 0.4336588382720947,
      "learning_rate": 5.183414262625364e-07,
      "loss": 0.3146,
      "step": 4342
    },
    {
      "epoch": 0.2078,
      "grad_norm": 0.43075352907180786,
      "learning_rate": 5.167948160738206e-07,
      "loss": 0.341,
      "step": 4343
    },
    {
      "epoch": 0.208,
      "grad_norm": 0.5537795424461365,
      "learning_rate": 5.152503909448503e-07,
      "loss": 0.346,
      "step": 4344
    },
    {
      "epoch": 0.2082,
      "grad_norm": 0.46690019965171814,
      "learning_rate": 5.137081516283582e-07,
      "loss": 0.3283,
      "step": 4345
    },
    {
      "epoch": 0.2084,
      "grad_norm": 0.4148690402507782,
      "learning_rate": 5.121680988760125e-07,
      "loss": 0.3238,
      "step": 4346
    },
    {
      "epoch": 0.2086,
      "grad_norm": 0.4630419611930847,
      "learning_rate": 5.106302334384172e-07,
      "loss": 0.328,
      "step": 4347
    },
    {
      "epoch": 0.2088,
      "grad_norm": 0.3979688882827759,
      "learning_rate": 5.090945560651073e-07,
      "loss": 0.3304,
      "step": 4348
    },
    {
      "epoch": 0.209,
      "grad_norm": 0.4452764391899109,
      "learning_rate": 5.075610675045567e-07,
      "loss": 0.3108,
      "step": 4349
    },
    {
      "epoch": 0.2092,
      "grad_norm": 0.4307324290275574,
      "learning_rate": 5.06029768504166e-07,
      "loss": 0.323,
      "step": 4350
    },
    {
      "epoch": 0.2094,
      "grad_norm": 0.3932841718196869,
      "learning_rate": 5.045006598102725e-07,
      "loss": 0.3284,
      "step": 4351
    },
    {
      "epoch": 0.2096,
      "grad_norm": 0.499598890542984,
      "learning_rate": 5.029737421681446e-07,
      "loss": 0.3233,
      "step": 4352
    },
    {
      "epoch": 0.2098,
      "grad_norm": 1.189171552658081,
      "learning_rate": 5.014490163219854e-07,
      "loss": 0.3273,
      "step": 4353
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0687042474746704,
      "learning_rate": 4.99926483014927e-07,
      "loss": 0.3644,
      "step": 4354
    },
    {
      "epoch": 0.2102,
      "grad_norm": 0.4478142261505127,
      "learning_rate": 4.984061429890324e-07,
      "loss": 0.3133,
      "step": 4355
    },
    {
      "epoch": 0.2104,
      "grad_norm": 0.4344960153102875,
      "learning_rate": 4.968879969852985e-07,
      "loss": 0.3367,
      "step": 4356
    },
    {
      "epoch": 0.2106,
      "grad_norm": 0.4610770046710968,
      "learning_rate": 4.9537204574365e-07,
      "loss": 0.3612,
      "step": 4357
    },
    {
      "epoch": 0.2108,
      "grad_norm": 0.4739937484264374,
      "learning_rate": 4.938582900029437e-07,
      "loss": 0.3519,
      "step": 4358
    },
    {
      "epoch": 0.211,
      "grad_norm": 0.6125321984291077,
      "learning_rate": 4.92346730500966e-07,
      "loss": 0.3451,
      "step": 4359
    },
    {
      "epoch": 0.2112,
      "grad_norm": 0.4559785723686218,
      "learning_rate": 4.908373679744316e-07,
      "loss": 0.3292,
      "step": 4360
    },
    {
      "epoch": 0.2114,
      "grad_norm": 0.45044979453086853,
      "learning_rate": 4.893302031589864e-07,
      "loss": 0.3297,
      "step": 4361
    },
    {
      "epoch": 0.2116,
      "grad_norm": 0.6850839257240295,
      "learning_rate": 4.878252367892033e-07,
      "loss": 0.357,
      "step": 4362
    },
    {
      "epoch": 0.2118,
      "grad_norm": 1.338188648223877,
      "learning_rate": 4.863224695985858e-07,
      "loss": 0.3423,
      "step": 4363
    },
    {
      "epoch": 0.212,
      "grad_norm": 0.4103778302669525,
      "learning_rate": 4.848219023195644e-07,
      "loss": 0.3435,
      "step": 4364
    },
    {
      "epoch": 0.2122,
      "grad_norm": 0.5914137959480286,
      "learning_rate": 4.833235356834959e-07,
      "loss": 0.3347,
      "step": 4365
    },
    {
      "epoch": 0.2124,
      "grad_norm": 0.3805255591869354,
      "learning_rate": 4.818273704206678e-07,
      "loss": 0.3214,
      "step": 4366
    },
    {
      "epoch": 0.2126,
      "grad_norm": 0.5288212299346924,
      "learning_rate": 4.803334072602917e-07,
      "loss": 0.349,
      "step": 4367
    },
    {
      "epoch": 0.2128,
      "grad_norm": 0.4510207772254944,
      "learning_rate": 4.788416469305068e-07,
      "loss": 0.3451,
      "step": 4368
    },
    {
      "epoch": 0.213,
      "grad_norm": 0.767156720161438,
      "learning_rate": 4.773520901583801e-07,
      "loss": 0.338,
      "step": 4369
    },
    {
      "epoch": 0.2132,
      "grad_norm": 0.8506453633308411,
      "learning_rate": 4.758647376699033e-07,
      "loss": 0.3914,
      "step": 4370
    },
    {
      "epoch": 0.2134,
      "grad_norm": 0.4463675320148468,
      "learning_rate": 4.743795901899928e-07,
      "loss": 0.3054,
      "step": 4371
    },
    {
      "epoch": 0.2136,
      "grad_norm": 0.5244629383087158,
      "learning_rate": 4.728966484424913e-07,
      "loss": 0.3356,
      "step": 4372
    },
    {
      "epoch": 0.2138,
      "grad_norm": 0.4583997130393982,
      "learning_rate": 4.714159131501689e-07,
      "loss": 0.303,
      "step": 4373
    },
    {
      "epoch": 0.214,
      "grad_norm": 0.6526700258255005,
      "learning_rate": 4.699373850347161e-07,
      "loss": 0.2973,
      "step": 4374
    },
    {
      "epoch": 0.2142,
      "grad_norm": 0.4636583626270294,
      "learning_rate": 4.6846106481675035e-07,
      "loss": 0.3246,
      "step": 4375
    },
    {
      "epoch": 0.2144,
      "grad_norm": 0.5247694849967957,
      "learning_rate": 4.6698695321581165e-07,
      "loss": 0.3568,
      "step": 4376
    },
    {
      "epoch": 0.2146,
      "grad_norm": 0.5101285576820374,
      "learning_rate": 4.655150509503642e-07,
      "loss": 0.3617,
      "step": 4377
    },
    {
      "epoch": 0.2148,
      "grad_norm": 0.5156652331352234,
      "learning_rate": 4.640453587377958e-07,
      "loss": 0.3389,
      "step": 4378
    },
    {
      "epoch": 0.215,
      "grad_norm": 0.5105535387992859,
      "learning_rate": 4.625778772944156e-07,
      "loss": 0.3276,
      "step": 4379
    },
    {
      "epoch": 0.2152,
      "grad_norm": 0.3470970094203949,
      "learning_rate": 4.6111260733545714e-07,
      "loss": 0.2942,
      "step": 4380
    },
    {
      "epoch": 0.2154,
      "grad_norm": 0.351947546005249,
      "learning_rate": 4.5964954957507414e-07,
      "loss": 0.3247,
      "step": 4381
    },
    {
      "epoch": 0.2156,
      "grad_norm": 0.37939006090164185,
      "learning_rate": 4.581887047263445e-07,
      "loss": 0.3028,
      "step": 4382
    },
    {
      "epoch": 0.2158,
      "grad_norm": 0.7641487121582031,
      "learning_rate": 4.567300735012653e-07,
      "loss": 0.3216,
      "step": 4383
    },
    {
      "epoch": 0.216,
      "grad_norm": 0.4992705285549164,
      "learning_rate": 4.552736566107563e-07,
      "loss": 0.325,
      "step": 4384
    },
    {
      "epoch": 0.2162,
      "grad_norm": 0.38424885272979736,
      "learning_rate": 4.538194547646574e-07,
      "loss": 0.3026,
      "step": 4385
    },
    {
      "epoch": 0.2164,
      "grad_norm": 0.408424437046051,
      "learning_rate": 4.523674686717283e-07,
      "loss": 0.3329,
      "step": 4386
    },
    {
      "epoch": 0.2166,
      "grad_norm": 0.491961270570755,
      "learning_rate": 4.5091769903964965e-07,
      "loss": 0.3276,
      "step": 4387
    },
    {
      "epoch": 0.2168,
      "grad_norm": 0.5393012166023254,
      "learning_rate": 4.494701465750217e-07,
      "loss": 0.3564,
      "step": 4388
    },
    {
      "epoch": 0.217,
      "grad_norm": 0.5320786833763123,
      "learning_rate": 4.480248119833641e-07,
      "loss": 0.3283,
      "step": 4389
    },
    {
      "epoch": 0.2172,
      "grad_norm": 0.5408861041069031,
      "learning_rate": 4.4658169596911493e-07,
      "loss": 0.3104,
      "step": 4390
    },
    {
      "epoch": 0.2174,
      "grad_norm": 0.4946640431880951,
      "learning_rate": 4.4514079923563103e-07,
      "loss": 0.307,
      "step": 4391
    },
    {
      "epoch": 0.2176,
      "grad_norm": 0.6231422424316406,
      "learning_rate": 4.4370212248518895e-07,
      "loss": 0.3474,
      "step": 4392
    },
    {
      "epoch": 0.2178,
      "grad_norm": 0.4205203354358673,
      "learning_rate": 4.4226566641898173e-07,
      "loss": 0.3597,
      "step": 4393
    },
    {
      "epoch": 0.218,
      "grad_norm": 0.4554661214351654,
      "learning_rate": 4.4083143173712207e-07,
      "loss": 0.3463,
      "step": 4394
    },
    {
      "epoch": 0.2182,
      "grad_norm": 0.566722571849823,
      "learning_rate": 4.3939941913863525e-07,
      "loss": 0.331,
      "step": 4395
    },
    {
      "epoch": 0.2184,
      "grad_norm": 0.4337329566478729,
      "learning_rate": 4.379696293214697e-07,
      "loss": 0.3131,
      "step": 4396
    },
    {
      "epoch": 0.2186,
      "grad_norm": 1.135631799697876,
      "learning_rate": 4.3654206298248625e-07,
      "loss": 0.3615,
      "step": 4397
    },
    {
      "epoch": 0.2188,
      "grad_norm": 0.4010556936264038,
      "learning_rate": 4.3511672081746393e-07,
      "loss": 0.3213,
      "step": 4398
    },
    {
      "epoch": 0.219,
      "grad_norm": 0.48953038454055786,
      "learning_rate": 4.33693603521097e-07,
      "loss": 0.3271,
      "step": 4399
    },
    {
      "epoch": 0.2192,
      "grad_norm": 0.47380971908569336,
      "learning_rate": 4.322727117869951e-07,
      "loss": 0.3039,
      "step": 4400
    },
    {
      "epoch": 0.2194,
      "grad_norm": 0.38879722356796265,
      "learning_rate": 4.308540463076849e-07,
      "loss": 0.3239,
      "step": 4401
    },
    {
      "epoch": 0.2196,
      "grad_norm": 0.42958173155784607,
      "learning_rate": 4.29437607774606e-07,
      "loss": 0.3311,
      "step": 4402
    },
    {
      "epoch": 0.2198,
      "grad_norm": 0.3948807716369629,
      "learning_rate": 4.280233968781139e-07,
      "loss": 0.3209,
      "step": 4403
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.48442891240119934,
      "learning_rate": 4.266114143074751e-07,
      "loss": 0.3365,
      "step": 4404
    },
    {
      "epoch": 0.2202,
      "grad_norm": 0.5008699893951416,
      "learning_rate": 4.2520166075087635e-07,
      "loss": 0.3312,
      "step": 4405
    },
    {
      "epoch": 0.2204,
      "grad_norm": 0.3896177411079407,
      "learning_rate": 4.237941368954124e-07,
      "loss": 0.3492,
      "step": 4406
    },
    {
      "epoch": 0.2206,
      "grad_norm": 0.5148847699165344,
      "learning_rate": 4.2238884342709397e-07,
      "loss": 0.3421,
      "step": 4407
    },
    {
      "epoch": 0.2208,
      "grad_norm": 0.5813378691673279,
      "learning_rate": 4.2098578103084376e-07,
      "loss": 0.3272,
      "step": 4408
    },
    {
      "epoch": 0.221,
      "grad_norm": 0.5260132551193237,
      "learning_rate": 4.195849503904975e-07,
      "loss": 0.3435,
      "step": 4409
    },
    {
      "epoch": 0.2212,
      "grad_norm": 0.4126540720462799,
      "learning_rate": 4.1818635218880186e-07,
      "loss": 0.3269,
      "step": 4410
    },
    {
      "epoch": 0.2214,
      "grad_norm": 0.5120560526847839,
      "learning_rate": 4.1678998710741936e-07,
      "loss": 0.3357,
      "step": 4411
    },
    {
      "epoch": 0.2216,
      "grad_norm": 1.237318515777588,
      "learning_rate": 4.153958558269189e-07,
      "loss": 0.3381,
      "step": 4412
    },
    {
      "epoch": 0.2218,
      "grad_norm": 0.49301713705062866,
      "learning_rate": 4.140039590267836e-07,
      "loss": 0.3209,
      "step": 4413
    },
    {
      "epoch": 0.222,
      "grad_norm": 0.46421313285827637,
      "learning_rate": 4.1261429738540694e-07,
      "loss": 0.2912,
      "step": 4414
    },
    {
      "epoch": 0.2222,
      "grad_norm": 0.671164870262146,
      "learning_rate": 4.112268715800943e-07,
      "loss": 0.3454,
      "step": 4415
    },
    {
      "epoch": 0.2224,
      "grad_norm": 0.4235536456108093,
      "learning_rate": 4.0984168228705934e-07,
      "loss": 0.314,
      "step": 4416
    },
    {
      "epoch": 0.2226,
      "grad_norm": 0.6809105277061462,
      "learning_rate": 4.084587301814269e-07,
      "loss": 0.3304,
      "step": 4417
    },
    {
      "epoch": 0.2228,
      "grad_norm": 0.5201569199562073,
      "learning_rate": 4.0707801593723006e-07,
      "loss": 0.3531,
      "step": 4418
    },
    {
      "epoch": 0.223,
      "grad_norm": 0.4367447793483734,
      "learning_rate": 4.056995402274122e-07,
      "loss": 0.3364,
      "step": 4419
    },
    {
      "epoch": 0.2232,
      "grad_norm": 0.46511608362197876,
      "learning_rate": 4.043233037238281e-07,
      "loss": 0.3356,
      "step": 4420
    },
    {
      "epoch": 0.2234,
      "grad_norm": 0.4808586835861206,
      "learning_rate": 4.029493070972362e-07,
      "loss": 0.3506,
      "step": 4421
    },
    {
      "epoch": 0.2236,
      "grad_norm": 0.42233264446258545,
      "learning_rate": 4.0157755101730645e-07,
      "loss": 0.3526,
      "step": 4422
    },
    {
      "epoch": 0.2238,
      "grad_norm": 0.518531084060669,
      "learning_rate": 4.002080361526156e-07,
      "loss": 0.3467,
      "step": 4423
    },
    {
      "epoch": 0.224,
      "grad_norm": 0.4586429297924042,
      "learning_rate": 3.9884076317064813e-07,
      "loss": 0.3119,
      "step": 4424
    },
    {
      "epoch": 0.2242,
      "grad_norm": 0.452260285615921,
      "learning_rate": 3.9747573273779816e-07,
      "loss": 0.3295,
      "step": 4425
    },
    {
      "epoch": 0.2244,
      "grad_norm": 0.38328954577445984,
      "learning_rate": 3.961129455193641e-07,
      "loss": 0.3246,
      "step": 4426
    },
    {
      "epoch": 0.2246,
      "grad_norm": 0.3949772119522095,
      "learning_rate": 3.947524021795518e-07,
      "loss": 0.3221,
      "step": 4427
    },
    {
      "epoch": 0.2248,
      "grad_norm": 0.5882042050361633,
      "learning_rate": 3.9339410338147363e-07,
      "loss": 0.3057,
      "step": 4428
    },
    {
      "epoch": 0.225,
      "grad_norm": 0.3929421603679657,
      "learning_rate": 3.920380497871473e-07,
      "loss": 0.3524,
      "step": 4429
    },
    {
      "epoch": 0.2252,
      "grad_norm": 0.4665968716144562,
      "learning_rate": 3.90684242057498e-07,
      "loss": 0.3194,
      "step": 4430
    },
    {
      "epoch": 0.2254,
      "grad_norm": 0.42601630091667175,
      "learning_rate": 3.89332680852354e-07,
      "loss": 0.3309,
      "step": 4431
    },
    {
      "epoch": 0.2256,
      "grad_norm": 1.0077860355377197,
      "learning_rate": 3.879833668304506e-07,
      "loss": 0.3373,
      "step": 4432
    },
    {
      "epoch": 0.2258,
      "grad_norm": 0.4411686360836029,
      "learning_rate": 3.866363006494256e-07,
      "loss": 0.3733,
      "step": 4433
    },
    {
      "epoch": 0.226,
      "grad_norm": 0.8758626580238342,
      "learning_rate": 3.85291482965825e-07,
      "loss": 0.3254,
      "step": 4434
    },
    {
      "epoch": 0.2262,
      "grad_norm": 0.407790869474411,
      "learning_rate": 3.8394891443509554e-07,
      "loss": 0.3262,
      "step": 4435
    },
    {
      "epoch": 0.2264,
      "grad_norm": 0.5080468654632568,
      "learning_rate": 3.8260859571158883e-07,
      "loss": 0.3376,
      "step": 4436
    },
    {
      "epoch": 0.2266,
      "grad_norm": 0.4621480405330658,
      "learning_rate": 3.812705274485595e-07,
      "loss": 0.3071,
      "step": 4437
    },
    {
      "epoch": 0.2268,
      "grad_norm": 1.136537790298462,
      "learning_rate": 3.7993471029816653e-07,
      "loss": 0.3512,
      "step": 4438
    },
    {
      "epoch": 0.227,
      "grad_norm": 0.4619442820549011,
      "learning_rate": 3.7860114491147017e-07,
      "loss": 0.3264,
      "step": 4439
    },
    {
      "epoch": 0.2272,
      "grad_norm": 0.5421351194381714,
      "learning_rate": 3.772698319384349e-07,
      "loss": 0.3305,
      "step": 4440
    },
    {
      "epoch": 0.2274,
      "grad_norm": 0.441523015499115,
      "learning_rate": 3.759407720279257e-07,
      "loss": 0.3356,
      "step": 4441
    },
    {
      "epoch": 0.2276,
      "grad_norm": 0.4838353395462036,
      "learning_rate": 3.7461396582771035e-07,
      "loss": 0.3334,
      "step": 4442
    },
    {
      "epoch": 0.2278,
      "grad_norm": 0.5130058526992798,
      "learning_rate": 3.732894139844578e-07,
      "loss": 0.3499,
      "step": 4443
    },
    {
      "epoch": 0.228,
      "grad_norm": 0.5121945738792419,
      "learning_rate": 3.7196711714373947e-07,
      "loss": 0.3216,
      "step": 4444
    },
    {
      "epoch": 0.2282,
      "grad_norm": 0.5355522036552429,
      "learning_rate": 3.7064707595002636e-07,
      "loss": 0.3214,
      "step": 4445
    },
    {
      "epoch": 0.2284,
      "grad_norm": 0.6326083540916443,
      "learning_rate": 3.693292910466906e-07,
      "loss": 0.3556,
      "step": 4446
    },
    {
      "epoch": 0.2286,
      "grad_norm": 0.38399451971054077,
      "learning_rate": 3.680137630760039e-07,
      "loss": 0.3117,
      "step": 4447
    },
    {
      "epoch": 0.2288,
      "grad_norm": 0.4648253321647644,
      "learning_rate": 3.6670049267913954e-07,
      "loss": 0.3261,
      "step": 4448
    },
    {
      "epoch": 0.229,
      "grad_norm": 0.4996013343334198,
      "learning_rate": 3.6538948049616886e-07,
      "loss": 0.3321,
      "step": 4449
    },
    {
      "epoch": 0.2292,
      "grad_norm": 0.39853543043136597,
      "learning_rate": 3.6408072716606346e-07,
      "loss": 0.3386,
      "step": 4450
    },
    {
      "epoch": 0.2294,
      "grad_norm": 0.6104252934455872,
      "learning_rate": 3.627742333266937e-07,
      "loss": 0.3412,
      "step": 4451
    },
    {
      "epoch": 0.2296,
      "grad_norm": 0.4444977641105652,
      "learning_rate": 3.614699996148285e-07,
      "loss": 0.3535,
      "step": 4452
    },
    {
      "epoch": 0.2298,
      "grad_norm": 0.43130865693092346,
      "learning_rate": 3.601680266661367e-07,
      "loss": 0.3471,
      "step": 4453
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.4212697744369507,
      "learning_rate": 3.5886831511518336e-07,
      "loss": 0.3345,
      "step": 4454
    },
    {
      "epoch": 0.2302,
      "grad_norm": 0.7497734427452087,
      "learning_rate": 3.575708655954324e-07,
      "loss": 0.3332,
      "step": 4455
    },
    {
      "epoch": 0.2304,
      "grad_norm": 0.4532374143600464,
      "learning_rate": 3.562756787392452e-07,
      "loss": 0.3381,
      "step": 4456
    },
    {
      "epoch": 0.2306,
      "grad_norm": 0.4286945164203644,
      "learning_rate": 3.5498275517787783e-07,
      "loss": 0.3142,
      "step": 4457
    },
    {
      "epoch": 0.2308,
      "grad_norm": 0.4383804500102997,
      "learning_rate": 3.5369209554148854e-07,
      "loss": 0.3417,
      "step": 4458
    },
    {
      "epoch": 0.231,
      "grad_norm": 0.4166756570339203,
      "learning_rate": 3.524037004591274e-07,
      "loss": 0.3339,
      "step": 4459
    },
    {
      "epoch": 0.2312,
      "grad_norm": 0.4536520838737488,
      "learning_rate": 3.511175705587433e-07,
      "loss": 0.3434,
      "step": 4460
    },
    {
      "epoch": 0.2314,
      "grad_norm": 0.4460821747779846,
      "learning_rate": 3.498337064671803e-07,
      "loss": 0.303,
      "step": 4461
    },
    {
      "epoch": 0.2316,
      "grad_norm": 0.4113999605178833,
      "learning_rate": 3.4855210881017675e-07,
      "loss": 0.3108,
      "step": 4462
    },
    {
      "epoch": 0.2318,
      "grad_norm": 0.44851773977279663,
      "learning_rate": 3.472727782123697e-07,
      "loss": 0.3392,
      "step": 4463
    },
    {
      "epoch": 0.232,
      "grad_norm": 0.4627659022808075,
      "learning_rate": 3.459957152972887e-07,
      "loss": 0.3384,
      "step": 4464
    },
    {
      "epoch": 0.2322,
      "grad_norm": 0.4026423692703247,
      "learning_rate": 3.4472092068735917e-07,
      "loss": 0.2971,
      "step": 4465
    },
    {
      "epoch": 0.2324,
      "grad_norm": 0.3579311966896057,
      "learning_rate": 3.434483950038986e-07,
      "loss": 0.2829,
      "step": 4466
    },
    {
      "epoch": 0.2326,
      "grad_norm": 0.43383321166038513,
      "learning_rate": 3.421781388671225e-07,
      "loss": 0.3204,
      "step": 4467
    },
    {
      "epoch": 0.2328,
      "grad_norm": 0.4241342842578888,
      "learning_rate": 3.409101528961378e-07,
      "loss": 0.3206,
      "step": 4468
    },
    {
      "epoch": 0.233,
      "grad_norm": 0.40078073740005493,
      "learning_rate": 3.396444377089453e-07,
      "loss": 0.3189,
      "step": 4469
    },
    {
      "epoch": 0.2332,
      "grad_norm": 0.589084804058075,
      "learning_rate": 3.3838099392243915e-07,
      "loss": 0.3637,
      "step": 4470
    },
    {
      "epoch": 0.2334,
      "grad_norm": 0.4129626154899597,
      "learning_rate": 3.371198221524069e-07,
      "loss": 0.344,
      "step": 4471
    },
    {
      "epoch": 0.2336,
      "grad_norm": 0.45879504084587097,
      "learning_rate": 3.358609230135268e-07,
      "loss": 0.3421,
      "step": 4472
    },
    {
      "epoch": 0.2338,
      "grad_norm": 0.3736560046672821,
      "learning_rate": 3.3460429711937417e-07,
      "loss": 0.3207,
      "step": 4473
    },
    {
      "epoch": 0.234,
      "grad_norm": 0.6075982451438904,
      "learning_rate": 3.3334994508241013e-07,
      "loss": 0.3423,
      "step": 4474
    },
    {
      "epoch": 0.2342,
      "grad_norm": 0.49828997254371643,
      "learning_rate": 3.320978675139919e-07,
      "loss": 0.327,
      "step": 4475
    },
    {
      "epoch": 0.2344,
      "grad_norm": 0.40580835938453674,
      "learning_rate": 3.3084806502436617e-07,
      "loss": 0.3028,
      "step": 4476
    },
    {
      "epoch": 0.2346,
      "grad_norm": 0.47967565059661865,
      "learning_rate": 3.2960053822267245e-07,
      "loss": 0.3509,
      "step": 4477
    },
    {
      "epoch": 0.2348,
      "grad_norm": 0.4229940176010132,
      "learning_rate": 3.283552877169399e-07,
      "loss": 0.3647,
      "step": 4478
    },
    {
      "epoch": 0.235,
      "grad_norm": 0.4271414875984192,
      "learning_rate": 3.271123141140886e-07,
      "loss": 0.3414,
      "step": 4479
    },
    {
      "epoch": 0.2352,
      "grad_norm": 0.623607873916626,
      "learning_rate": 3.258716180199278e-07,
      "loss": 0.3343,
      "step": 4480
    },
    {
      "epoch": 0.2354,
      "grad_norm": 0.4138796031475067,
      "learning_rate": 3.246332000391583e-07,
      "loss": 0.3271,
      "step": 4481
    },
    {
      "epoch": 0.2356,
      "grad_norm": 0.37094250321388245,
      "learning_rate": 3.233970607753717e-07,
      "loss": 0.2958,
      "step": 4482
    },
    {
      "epoch": 0.2358,
      "grad_norm": 0.5075497627258301,
      "learning_rate": 3.2216320083104434e-07,
      "loss": 0.337,
      "step": 4483
    },
    {
      "epoch": 0.236,
      "grad_norm": 0.39073652029037476,
      "learning_rate": 3.2093162080754634e-07,
      "loss": 0.3267,
      "step": 4484
    },
    {
      "epoch": 0.2362,
      "grad_norm": 0.4863068461418152,
      "learning_rate": 3.1970232130513365e-07,
      "loss": 0.3166,
      "step": 4485
    },
    {
      "epoch": 0.2364,
      "grad_norm": 0.37814003229141235,
      "learning_rate": 3.1847530292295313e-07,
      "loss": 0.3449,
      "step": 4486
    },
    {
      "epoch": 0.2366,
      "grad_norm": 0.4397203028202057,
      "learning_rate": 3.172505662590386e-07,
      "loss": 0.3224,
      "step": 4487
    },
    {
      "epoch": 0.2368,
      "grad_norm": 0.5313746929168701,
      "learning_rate": 3.160281119103109e-07,
      "loss": 0.3672,
      "step": 4488
    },
    {
      "epoch": 0.237,
      "grad_norm": 0.5483813285827637,
      "learning_rate": 3.148079404725801e-07,
      "loss": 0.3393,
      "step": 4489
    },
    {
      "epoch": 0.2372,
      "grad_norm": 0.4060356318950653,
      "learning_rate": 3.135900525405428e-07,
      "loss": 0.3236,
      "step": 4490
    },
    {
      "epoch": 0.2374,
      "grad_norm": 0.5196436047554016,
      "learning_rate": 3.123744487077829e-07,
      "loss": 0.3422,
      "step": 4491
    },
    {
      "epoch": 0.2376,
      "grad_norm": 0.45039504766464233,
      "learning_rate": 3.1116112956677045e-07,
      "loss": 0.3199,
      "step": 4492
    },
    {
      "epoch": 0.2378,
      "grad_norm": 1.0713049173355103,
      "learning_rate": 3.0995009570886305e-07,
      "loss": 0.3371,
      "step": 4493
    },
    {
      "epoch": 0.238,
      "grad_norm": 0.4703753590583801,
      "learning_rate": 3.0874134772430344e-07,
      "loss": 0.3103,
      "step": 4494
    },
    {
      "epoch": 0.2382,
      "grad_norm": 0.6932691335678101,
      "learning_rate": 3.0753488620222037e-07,
      "loss": 0.3367,
      "step": 4495
    },
    {
      "epoch": 0.2384,
      "grad_norm": 0.4789579212665558,
      "learning_rate": 3.0633071173062966e-07,
      "loss": 0.3381,
      "step": 4496
    },
    {
      "epoch": 0.2386,
      "grad_norm": 0.4185452163219452,
      "learning_rate": 3.051288248964307e-07,
      "loss": 0.3276,
      "step": 4497
    },
    {
      "epoch": 0.2388,
      "grad_norm": 0.5256708264350891,
      "learning_rate": 3.0392922628540875e-07,
      "loss": 0.3246,
      "step": 4498
    },
    {
      "epoch": 0.239,
      "grad_norm": 0.43065205216407776,
      "learning_rate": 3.027319164822329e-07,
      "loss": 0.3164,
      "step": 4499
    },
    {
      "epoch": 0.2392,
      "grad_norm": 0.8343338370323181,
      "learning_rate": 3.015368960704584e-07,
      "loss": 0.3246,
      "step": 4500
    },
    {
      "epoch": 0.2394,
      "grad_norm": 0.3699074387550354,
      "learning_rate": 3.003441656325229e-07,
      "loss": 0.2989,
      "step": 4501
    },
    {
      "epoch": 0.2396,
      "grad_norm": 0.474102646112442,
      "learning_rate": 2.99153725749749e-07,
      "loss": 0.3244,
      "step": 4502
    },
    {
      "epoch": 0.2398,
      "grad_norm": 0.4731607735157013,
      "learning_rate": 2.9796557700234317e-07,
      "loss": 0.3473,
      "step": 4503
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.44660839438438416,
      "learning_rate": 2.967797199693928e-07,
      "loss": 0.3146,
      "step": 4504
    },
    {
      "epoch": 0.2402,
      "grad_norm": 0.6244240403175354,
      "learning_rate": 2.9559615522887275e-07,
      "loss": 0.327,
      "step": 4505
    },
    {
      "epoch": 0.2404,
      "grad_norm": 0.41514018177986145,
      "learning_rate": 2.9441488335763656e-07,
      "loss": 0.3337,
      "step": 4506
    },
    {
      "epoch": 0.2406,
      "grad_norm": 0.5978337526321411,
      "learning_rate": 2.9323590493142206e-07,
      "loss": 0.3478,
      "step": 4507
    },
    {
      "epoch": 0.2408,
      "grad_norm": 0.3942466676235199,
      "learning_rate": 2.920592205248496e-07,
      "loss": 0.3288,
      "step": 4508
    },
    {
      "epoch": 0.241,
      "grad_norm": 0.5523399710655212,
      "learning_rate": 2.908848307114198e-07,
      "loss": 0.3354,
      "step": 4509
    },
    {
      "epoch": 0.2412,
      "grad_norm": 0.4257660508155823,
      "learning_rate": 2.8971273606351656e-07,
      "loss": 0.3537,
      "step": 4510
    },
    {
      "epoch": 0.2414,
      "grad_norm": 0.7530517578125,
      "learning_rate": 2.8854293715240455e-07,
      "loss": 0.3378,
      "step": 4511
    },
    {
      "epoch": 0.2416,
      "grad_norm": 0.5773714184761047,
      "learning_rate": 2.8737543454822993e-07,
      "loss": 0.3395,
      "step": 4512
    },
    {
      "epoch": 0.2418,
      "grad_norm": 1.000196099281311,
      "learning_rate": 2.862102288200186e-07,
      "loss": 0.3612,
      "step": 4513
    },
    {
      "epoch": 0.242,
      "grad_norm": 0.41246816515922546,
      "learning_rate": 2.850473205356774e-07,
      "loss": 0.3205,
      "step": 4514
    },
    {
      "epoch": 0.2422,
      "grad_norm": 0.47914189100265503,
      "learning_rate": 2.838867102619952e-07,
      "loss": 0.2961,
      "step": 4515
    },
    {
      "epoch": 0.2424,
      "grad_norm": 0.46578335762023926,
      "learning_rate": 2.8272839856463783e-07,
      "loss": 0.3656,
      "step": 4516
    },
    {
      "epoch": 0.2426,
      "grad_norm": 0.4475353956222534,
      "learning_rate": 2.815723860081537e-07,
      "loss": 0.3061,
      "step": 4517
    },
    {
      "epoch": 0.2428,
      "grad_norm": 0.5915449261665344,
      "learning_rate": 2.804186731559677e-07,
      "loss": 0.3447,
      "step": 4518
    },
    {
      "epoch": 0.243,
      "grad_norm": 0.516845166683197,
      "learning_rate": 2.792672605703867e-07,
      "loss": 0.333,
      "step": 4519
    },
    {
      "epoch": 0.2432,
      "grad_norm": 0.48860839009284973,
      "learning_rate": 2.7811814881259503e-07,
      "loss": 0.3516,
      "step": 4520
    },
    {
      "epoch": 0.2434,
      "grad_norm": 0.6101433038711548,
      "learning_rate": 2.7697133844265535e-07,
      "loss": 0.3279,
      "step": 4521
    },
    {
      "epoch": 0.2436,
      "grad_norm": 1.2879458665847778,
      "learning_rate": 2.758268300195094e-07,
      "loss": 0.3516,
      "step": 4522
    },
    {
      "epoch": 0.2438,
      "grad_norm": 0.40626317262649536,
      "learning_rate": 2.746846241009765e-07,
      "loss": 0.3443,
      "step": 4523
    },
    {
      "epoch": 0.244,
      "grad_norm": 0.4474978446960449,
      "learning_rate": 2.735447212437531e-07,
      "loss": 0.3377,
      "step": 4524
    },
    {
      "epoch": 0.2442,
      "grad_norm": 0.384938508272171,
      "learning_rate": 2.724071220034158e-07,
      "loss": 0.3333,
      "step": 4525
    },
    {
      "epoch": 0.2444,
      "grad_norm": 0.5129152536392212,
      "learning_rate": 2.712718269344161e-07,
      "loss": 0.3727,
      "step": 4526
    },
    {
      "epoch": 0.2446,
      "grad_norm": 0.401533305644989,
      "learning_rate": 2.701388365900831e-07,
      "loss": 0.3244,
      "step": 4527
    },
    {
      "epoch": 0.2448,
      "grad_norm": 0.4968706965446472,
      "learning_rate": 2.690081515226206e-07,
      "loss": 0.3448,
      "step": 4528
    },
    {
      "epoch": 0.245,
      "grad_norm": 0.4778997600078583,
      "learning_rate": 2.6787977228311336e-07,
      "loss": 0.3323,
      "step": 4529
    },
    {
      "epoch": 0.2452,
      "grad_norm": 0.5232058167457581,
      "learning_rate": 2.6675369942151864e-07,
      "loss": 0.3485,
      "step": 4530
    },
    {
      "epoch": 0.2454,
      "grad_norm": 0.462781697511673,
      "learning_rate": 2.656299334866702e-07,
      "loss": 0.3583,
      "step": 4531
    },
    {
      "epoch": 0.2456,
      "grad_norm": 0.4419201612472534,
      "learning_rate": 2.6450847502627883e-07,
      "loss": 0.3453,
      "step": 4532
    },
    {
      "epoch": 0.2458,
      "grad_norm": 0.46420690417289734,
      "learning_rate": 2.6338932458692847e-07,
      "loss": 0.3393,
      "step": 4533
    },
    {
      "epoch": 0.246,
      "grad_norm": 0.46088287234306335,
      "learning_rate": 2.622724827140816e-07,
      "loss": 0.3494,
      "step": 4534
    },
    {
      "epoch": 0.2462,
      "grad_norm": 0.44033700227737427,
      "learning_rate": 2.611579499520722e-07,
      "loss": 0.337,
      "step": 4535
    },
    {
      "epoch": 0.2464,
      "grad_norm": 0.513659656047821,
      "learning_rate": 2.600457268441092e-07,
      "loss": 0.3389,
      "step": 4536
    },
    {
      "epoch": 0.2466,
      "grad_norm": 0.4440673589706421,
      "learning_rate": 2.589358139322767e-07,
      "loss": 0.2918,
      "step": 4537
    },
    {
      "epoch": 0.2468,
      "grad_norm": 0.41392210125923157,
      "learning_rate": 2.578282117575343e-07,
      "loss": 0.3453,
      "step": 4538
    },
    {
      "epoch": 0.247,
      "grad_norm": 0.5801279544830322,
      "learning_rate": 2.5672292085971276e-07,
      "loss": 0.3151,
      "step": 4539
    },
    {
      "epoch": 0.2472,
      "grad_norm": 0.7148948311805725,
      "learning_rate": 2.556199417775174e-07,
      "loss": 0.349,
      "step": 4540
    },
    {
      "epoch": 0.2474,
      "grad_norm": 0.4325096607208252,
      "learning_rate": 2.5451927504852757e-07,
      "loss": 0.3145,
      "step": 4541
    },
    {
      "epoch": 0.2476,
      "grad_norm": 0.3731968104839325,
      "learning_rate": 2.534209212091937e-07,
      "loss": 0.3468,
      "step": 4542
    },
    {
      "epoch": 0.2478,
      "grad_norm": 0.43166473507881165,
      "learning_rate": 2.523248807948403e-07,
      "loss": 0.3177,
      "step": 4543
    },
    {
      "epoch": 0.248,
      "grad_norm": 0.394045352935791,
      "learning_rate": 2.5123115433966615e-07,
      "loss": 0.3285,
      "step": 4544
    },
    {
      "epoch": 0.2482,
      "grad_norm": 0.5032169222831726,
      "learning_rate": 2.5013974237673824e-07,
      "loss": 0.3824,
      "step": 4545
    },
    {
      "epoch": 0.2484,
      "grad_norm": 0.5567966103553772,
      "learning_rate": 2.4905064543799706e-07,
      "loss": 0.3583,
      "step": 4546
    },
    {
      "epoch": 0.2486,
      "grad_norm": 0.47077447175979614,
      "learning_rate": 2.479638640542564e-07,
      "loss": 0.3621,
      "step": 4547
    },
    {
      "epoch": 0.2488,
      "grad_norm": 0.5163447260856628,
      "learning_rate": 2.4687939875519984e-07,
      "loss": 0.3228,
      "step": 4548
    },
    {
      "epoch": 0.249,
      "grad_norm": 0.5827368497848511,
      "learning_rate": 2.457972500693834e-07,
      "loss": 0.3711,
      "step": 4549
    },
    {
      "epoch": 0.2492,
      "grad_norm": 0.46567589044570923,
      "learning_rate": 2.447174185242324e-07,
      "loss": 0.3172,
      "step": 4550
    },
    {
      "epoch": 0.2494,
      "grad_norm": 0.4273761808872223,
      "learning_rate": 2.4363990464604357e-07,
      "loss": 0.3333,
      "step": 4551
    },
    {
      "epoch": 0.2496,
      "grad_norm": 1.0894922018051147,
      "learning_rate": 2.4256470895998363e-07,
      "loss": 0.3206,
      "step": 4552
    },
    {
      "epoch": 0.2498,
      "grad_norm": 0.38590314984321594,
      "learning_rate": 2.414918319900922e-07,
      "loss": 0.3059,
      "step": 4553
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.44362589716911316,
      "learning_rate": 2.404212742592743e-07,
      "loss": 0.3043,
      "step": 4554
    },
    {
      "epoch": 0.2502,
      "grad_norm": 0.5412023663520813,
      "learning_rate": 2.3935303628930705e-07,
      "loss": 0.3216,
      "step": 4555
    },
    {
      "epoch": 0.2504,
      "grad_norm": 0.49267444014549255,
      "learning_rate": 2.3828711860083676e-07,
      "loss": 0.3109,
      "step": 4556
    },
    {
      "epoch": 0.2506,
      "grad_norm": 0.4988182485103607,
      "learning_rate": 2.3722352171337836e-07,
      "loss": 0.359,
      "step": 4557
    },
    {
      "epoch": 0.2508,
      "grad_norm": 0.42428240180015564,
      "learning_rate": 2.361622461453178e-07,
      "loss": 0.3329,
      "step": 4558
    },
    {
      "epoch": 0.251,
      "grad_norm": 0.4043004512786865,
      "learning_rate": 2.351032924139063e-07,
      "loss": 0.3387,
      "step": 4559
    },
    {
      "epoch": 0.2512,
      "grad_norm": 0.4972406327724457,
      "learning_rate": 2.3404666103526542e-07,
      "loss": 0.3351,
      "step": 4560
    },
    {
      "epoch": 0.2514,
      "grad_norm": 0.511881411075592,
      "learning_rate": 2.3299235252438434e-07,
      "loss": 0.3079,
      "step": 4561
    },
    {
      "epoch": 0.2516,
      "grad_norm": 0.3963022530078888,
      "learning_rate": 2.319403673951204e-07,
      "loss": 0.323,
      "step": 4562
    },
    {
      "epoch": 0.2518,
      "grad_norm": 0.49198639392852783,
      "learning_rate": 2.3089070616019838e-07,
      "loss": 0.3453,
      "step": 4563
    },
    {
      "epoch": 0.252,
      "grad_norm": 0.5566471815109253,
      "learning_rate": 2.2984336933121076e-07,
      "loss": 0.3104,
      "step": 4564
    },
    {
      "epoch": 0.2522,
      "grad_norm": 0.41121843457221985,
      "learning_rate": 2.287983574186159e-07,
      "loss": 0.3205,
      "step": 4565
    },
    {
      "epoch": 0.2524,
      "grad_norm": 0.43003764748573303,
      "learning_rate": 2.2775567093174022e-07,
      "loss": 0.352,
      "step": 4566
    },
    {
      "epoch": 0.2526,
      "grad_norm": 0.5312823057174683,
      "learning_rate": 2.2671531037877724e-07,
      "loss": 0.3367,
      "step": 4567
    },
    {
      "epoch": 0.2528,
      "grad_norm": 0.38273727893829346,
      "learning_rate": 2.2567727626678527e-07,
      "loss": 0.3031,
      "step": 4568
    },
    {
      "epoch": 0.253,
      "grad_norm": 0.4936683475971222,
      "learning_rate": 2.2464156910168954e-07,
      "loss": 0.3408,
      "step": 4569
    },
    {
      "epoch": 0.2532,
      "grad_norm": 0.40623095631599426,
      "learning_rate": 2.2360818938828189e-07,
      "loss": 0.322,
      "step": 4570
    },
    {
      "epoch": 0.2534,
      "grad_norm": 0.43159884214401245,
      "learning_rate": 2.2257713763021826e-07,
      "loss": 0.3455,
      "step": 4571
    },
    {
      "epoch": 0.2536,
      "grad_norm": 0.5283619165420532,
      "learning_rate": 2.2154841433002062e-07,
      "loss": 0.3579,
      "step": 4572
    },
    {
      "epoch": 0.2538,
      "grad_norm": 0.5397290587425232,
      "learning_rate": 2.2052201998907673e-07,
      "loss": 0.3189,
      "step": 4573
    },
    {
      "epoch": 0.254,
      "grad_norm": 0.5157825350761414,
      "learning_rate": 2.1949795510763872e-07,
      "loss": 0.3658,
      "step": 4574
    },
    {
      "epoch": 0.2542,
      "grad_norm": 0.34151336550712585,
      "learning_rate": 2.1847622018482283e-07,
      "loss": 0.318,
      "step": 4575
    },
    {
      "epoch": 0.2544,
      "grad_norm": 0.5159252285957336,
      "learning_rate": 2.174568157186102e-07,
      "loss": 0.3408,
      "step": 4576
    },
    {
      "epoch": 0.2546,
      "grad_norm": 0.6913192868232727,
      "learning_rate": 2.1643974220584729e-07,
      "loss": 0.3775,
      "step": 4577
    },
    {
      "epoch": 0.2548,
      "grad_norm": 0.4009915292263031,
      "learning_rate": 2.154250001422431e-07,
      "loss": 0.3023,
      "step": 4578
    },
    {
      "epoch": 0.255,
      "grad_norm": 0.43631431460380554,
      "learning_rate": 2.1441259002236924e-07,
      "loss": 0.3352,
      "step": 4579
    },
    {
      "epoch": 0.2552,
      "grad_norm": 0.4092567265033722,
      "learning_rate": 2.134025123396638e-07,
      "loss": 0.2894,
      "step": 4580
    },
    {
      "epoch": 0.2554,
      "grad_norm": 0.5176042914390564,
      "learning_rate": 2.123947675864252e-07,
      "loss": 0.3531,
      "step": 4581
    },
    {
      "epoch": 0.2556,
      "grad_norm": 0.5359839797019958,
      "learning_rate": 2.1138935625381663e-07,
      "loss": 0.3059,
      "step": 4582
    },
    {
      "epoch": 0.2558,
      "grad_norm": 0.3736419379711151,
      "learning_rate": 2.103862788318628e-07,
      "loss": 0.3103,
      "step": 4583
    },
    {
      "epoch": 0.256,
      "grad_norm": 0.46227291226387024,
      "learning_rate": 2.0938553580945208e-07,
      "loss": 0.3335,
      "step": 4584
    },
    {
      "epoch": 0.2562,
      "grad_norm": 0.4781275689601898,
      "learning_rate": 2.083871276743338e-07,
      "loss": 0.315,
      "step": 4585
    },
    {
      "epoch": 0.2564,
      "grad_norm": 0.5772593021392822,
      "learning_rate": 2.0739105491312028e-07,
      "loss": 0.3337,
      "step": 4586
    },
    {
      "epoch": 0.2566,
      "grad_norm": 0.44926464557647705,
      "learning_rate": 2.0639731801128603e-07,
      "loss": 0.3568,
      "step": 4587
    },
    {
      "epoch": 0.2568,
      "grad_norm": 0.5409942269325256,
      "learning_rate": 2.054059174531653e-07,
      "loss": 0.3448,
      "step": 4588
    },
    {
      "epoch": 0.257,
      "grad_norm": 0.5060105323791504,
      "learning_rate": 2.0441685372195487e-07,
      "loss": 0.3096,
      "step": 4589
    },
    {
      "epoch": 0.2572,
      "grad_norm": 0.4632422924041748,
      "learning_rate": 2.0343012729971244e-07,
      "loss": 0.3198,
      "step": 4590
    },
    {
      "epoch": 0.2574,
      "grad_norm": 0.4585506319999695,
      "learning_rate": 2.0244573866735673e-07,
      "loss": 0.3305,
      "step": 4591
    },
    {
      "epoch": 0.2576,
      "grad_norm": 0.6556222438812256,
      "learning_rate": 2.0146368830466668e-07,
      "loss": 0.3453,
      "step": 4592
    },
    {
      "epoch": 0.2578,
      "grad_norm": 0.3919202983379364,
      "learning_rate": 2.0048397669028164e-07,
      "loss": 0.3199,
      "step": 4593
    },
    {
      "epoch": 0.258,
      "grad_norm": 0.48281994462013245,
      "learning_rate": 1.995066043017013e-07,
      "loss": 0.3493,
      "step": 4594
    },
    {
      "epoch": 0.2582,
      "grad_norm": 0.3967198431491852,
      "learning_rate": 1.9853157161528468e-07,
      "loss": 0.3106,
      "step": 4595
    },
    {
      "epoch": 0.2584,
      "grad_norm": 0.40724194049835205,
      "learning_rate": 1.9755887910625103e-07,
      "loss": 0.3161,
      "step": 4596
    },
    {
      "epoch": 0.2586,
      "grad_norm": 0.4735030233860016,
      "learning_rate": 1.9658852724868005e-07,
      "loss": 0.3008,
      "step": 4597
    },
    {
      "epoch": 0.2588,
      "grad_norm": 0.3832450807094574,
      "learning_rate": 1.9562051651550784e-07,
      "loss": 0.3225,
      "step": 4598
    },
    {
      "epoch": 0.259,
      "grad_norm": 0.5697482228279114,
      "learning_rate": 1.9465484737853092e-07,
      "loss": 0.3738,
      "step": 4599
    },
    {
      "epoch": 0.2592,
      "grad_norm": 0.5669687390327454,
      "learning_rate": 1.9369152030840553e-07,
      "loss": 0.3311,
      "step": 4600
    },
    {
      "epoch": 0.2594,
      "grad_norm": 0.4603635370731354,
      "learning_rate": 1.927305357746462e-07,
      "loss": 0.381,
      "step": 4601
    },
    {
      "epoch": 0.2596,
      "grad_norm": 0.49179089069366455,
      "learning_rate": 1.917718942456237e-07,
      "loss": 0.3242,
      "step": 4602
    },
    {
      "epoch": 0.2598,
      "grad_norm": 0.4239453375339508,
      "learning_rate": 1.9081559618856938e-07,
      "loss": 0.3589,
      "step": 4603
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.5426622629165649,
      "learning_rate": 1.8986164206957037e-07,
      "loss": 0.3317,
      "step": 4604
    },
    {
      "epoch": 0.2602,
      "grad_norm": 0.38584718108177185,
      "learning_rate": 1.8891003235357307e-07,
      "loss": 0.3285,
      "step": 4605
    },
    {
      "epoch": 0.2604,
      "grad_norm": 0.3590281903743744,
      "learning_rate": 1.8796076750438096e-07,
      "loss": 0.3096,
      "step": 4606
    },
    {
      "epoch": 0.2606,
      "grad_norm": 0.42349880933761597,
      "learning_rate": 1.8701384798465284e-07,
      "loss": 0.3084,
      "step": 4607
    },
    {
      "epoch": 0.2608,
      "grad_norm": 0.45431721210479736,
      "learning_rate": 1.8606927425590616e-07,
      "loss": 0.3465,
      "step": 4608
    },
    {
      "epoch": 0.261,
      "grad_norm": 0.4601645767688751,
      "learning_rate": 1.8512704677851489e-07,
      "loss": 0.3397,
      "step": 4609
    },
    {
      "epoch": 0.2612,
      "grad_norm": 0.4158497750759125,
      "learning_rate": 1.841871660117095e-07,
      "loss": 0.3358,
      "step": 4610
    },
    {
      "epoch": 0.2614,
      "grad_norm": 0.5016064047813416,
      "learning_rate": 1.832496324135763e-07,
      "loss": 0.3249,
      "step": 4611
    },
    {
      "epoch": 0.2616,
      "grad_norm": 0.5591458678245544,
      "learning_rate": 1.8231444644105755e-07,
      "loss": 0.3291,
      "step": 4612
    },
    {
      "epoch": 0.2618,
      "grad_norm": 0.4571053981781006,
      "learning_rate": 1.8138160854995145e-07,
      "loss": 0.3262,
      "step": 4613
    },
    {
      "epoch": 0.262,
      "grad_norm": 0.5735933184623718,
      "learning_rate": 1.804511191949121e-07,
      "loss": 0.3288,
      "step": 4614
    },
    {
      "epoch": 0.2622,
      "grad_norm": 0.7203124761581421,
      "learning_rate": 1.7952297882945e-07,
      "loss": 0.3338,
      "step": 4615
    },
    {
      "epoch": 0.2624,
      "grad_norm": 0.4217219650745392,
      "learning_rate": 1.785971879059273e-07,
      "loss": 0.3099,
      "step": 4616
    },
    {
      "epoch": 0.2626,
      "grad_norm": 0.56905597448349,
      "learning_rate": 1.7767374687556405e-07,
      "loss": 0.336,
      "step": 4617
    },
    {
      "epoch": 0.2628,
      "grad_norm": 0.38447412848472595,
      "learning_rate": 1.7675265618843361e-07,
      "loss": 0.3225,
      "step": 4618
    },
    {
      "epoch": 0.263,
      "grad_norm": 0.5750580430030823,
      "learning_rate": 1.758339162934658e-07,
      "loss": 0.3448,
      "step": 4619
    },
    {
      "epoch": 0.2632,
      "grad_norm": 0.6049262881278992,
      "learning_rate": 1.7491752763844294e-07,
      "loss": 0.3373,
      "step": 4620
    },
    {
      "epoch": 0.2634,
      "grad_norm": 0.47291141748428345,
      "learning_rate": 1.740034906700011e-07,
      "loss": 0.3128,
      "step": 4621
    },
    {
      "epoch": 0.2636,
      "grad_norm": 0.41986581683158875,
      "learning_rate": 1.7309180583363062e-07,
      "loss": 0.336,
      "step": 4622
    },
    {
      "epoch": 0.2638,
      "grad_norm": 0.41180673241615295,
      "learning_rate": 1.7218247357367656e-07,
      "loss": 0.3405,
      "step": 4623
    },
    {
      "epoch": 0.264,
      "grad_norm": 0.3336499333381653,
      "learning_rate": 1.7127549433333557e-07,
      "loss": 0.3199,
      "step": 4624
    },
    {
      "epoch": 0.2642,
      "grad_norm": 0.4570178687572479,
      "learning_rate": 1.7037086855465902e-07,
      "loss": 0.3952,
      "step": 4625
    },
    {
      "epoch": 0.2644,
      "grad_norm": 0.45571714639663696,
      "learning_rate": 1.6946859667854977e-07,
      "loss": 0.3425,
      "step": 4626
    },
    {
      "epoch": 0.2646,
      "grad_norm": 0.4589521884918213,
      "learning_rate": 1.6856867914476492e-07,
      "loss": 0.3329,
      "step": 4627
    },
    {
      "epoch": 0.2648,
      "grad_norm": 0.49321138858795166,
      "learning_rate": 1.6767111639191202e-07,
      "loss": 0.3383,
      "step": 4628
    },
    {
      "epoch": 0.265,
      "grad_norm": 0.4606033265590668,
      "learning_rate": 1.6677590885745388e-07,
      "loss": 0.3284,
      "step": 4629
    },
    {
      "epoch": 0.2652,
      "grad_norm": 0.4686354994773865,
      "learning_rate": 1.6588305697770313e-07,
      "loss": 0.3316,
      "step": 4630
    },
    {
      "epoch": 0.2654,
      "grad_norm": 0.5442278981208801,
      "learning_rate": 1.6499256118782503e-07,
      "loss": 0.3432,
      "step": 4631
    },
    {
      "epoch": 0.2656,
      "grad_norm": 0.4843980669975281,
      "learning_rate": 1.6410442192183574e-07,
      "loss": 0.3308,
      "step": 4632
    },
    {
      "epoch": 0.2658,
      "grad_norm": 0.43763965368270874,
      "learning_rate": 1.6321863961260452e-07,
      "loss": 0.3542,
      "step": 4633
    },
    {
      "epoch": 0.266,
      "grad_norm": 0.40040361881256104,
      "learning_rate": 1.6233521469185054e-07,
      "loss": 0.3528,
      "step": 4634
    },
    {
      "epoch": 0.2662,
      "grad_norm": 0.3958882987499237,
      "learning_rate": 1.6145414759014433e-07,
      "loss": 0.3275,
      "step": 4635
    },
    {
      "epoch": 0.2664,
      "grad_norm": 0.4411078691482544,
      "learning_rate": 1.6057543873690685e-07,
      "loss": 0.3516,
      "step": 4636
    },
    {
      "epoch": 0.2666,
      "grad_norm": 0.4190390706062317,
      "learning_rate": 1.596990885604105e-07,
      "loss": 0.2993,
      "step": 4637
    },
    {
      "epoch": 0.2668,
      "grad_norm": 0.7191802859306335,
      "learning_rate": 1.5882509748777809e-07,
      "loss": 0.3262,
      "step": 4638
    },
    {
      "epoch": 0.267,
      "grad_norm": 0.4540862739086151,
      "learning_rate": 1.5795346594498162e-07,
      "loss": 0.3253,
      "step": 4639
    },
    {
      "epoch": 0.2672,
      "grad_norm": 0.391668438911438,
      "learning_rate": 1.5708419435684463e-07,
      "loss": 0.3004,
      "step": 4640
    },
    {
      "epoch": 0.2674,
      "grad_norm": 0.40534883737564087,
      "learning_rate": 1.5621728314703822e-07,
      "loss": 0.3443,
      "step": 4641
    },
    {
      "epoch": 0.2676,
      "grad_norm": 0.39316344261169434,
      "learning_rate": 1.553527327380855e-07,
      "loss": 0.3166,
      "step": 4642
    },
    {
      "epoch": 0.2678,
      "grad_norm": 0.5514549016952515,
      "learning_rate": 1.5449054355135718e-07,
      "loss": 0.3225,
      "step": 4643
    },
    {
      "epoch": 0.268,
      "grad_norm": 0.5075416564941406,
      "learning_rate": 1.5363071600707435e-07,
      "loss": 0.3488,
      "step": 4644
    },
    {
      "epoch": 0.2682,
      "grad_norm": 0.47598201036453247,
      "learning_rate": 1.5277325052430569e-07,
      "loss": 0.3292,
      "step": 4645
    },
    {
      "epoch": 0.2684,
      "grad_norm": 0.471495658159256,
      "learning_rate": 1.5191814752097024e-07,
      "loss": 0.3009,
      "step": 4646
    },
    {
      "epoch": 0.2686,
      "grad_norm": 0.3968542814254761,
      "learning_rate": 1.5106540741383402e-07,
      "loss": 0.3319,
      "step": 4647
    },
    {
      "epoch": 0.2688,
      "grad_norm": 0.41790324449539185,
      "learning_rate": 1.502150306185135e-07,
      "loss": 0.3473,
      "step": 4648
    },
    {
      "epoch": 0.269,
      "grad_norm": 0.5459285974502563,
      "learning_rate": 1.4936701754947104e-07,
      "loss": 0.3109,
      "step": 4649
    },
    {
      "epoch": 0.2692,
      "grad_norm": 0.39984938502311707,
      "learning_rate": 1.4852136862001766e-07,
      "loss": 0.3147,
      "step": 4650
    },
    {
      "epoch": 0.2694,
      "grad_norm": 0.41624096035957336,
      "learning_rate": 1.4767808424231312e-07,
      "loss": 0.327,
      "step": 4651
    },
    {
      "epoch": 0.2696,
      "grad_norm": 0.42964476346969604,
      "learning_rate": 1.4683716482736364e-07,
      "loss": 0.325,
      "step": 4652
    },
    {
      "epoch": 0.2698,
      "grad_norm": 0.5121008157730103,
      "learning_rate": 1.459986107850231e-07,
      "loss": 0.3275,
      "step": 4653
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.44055068492889404,
      "learning_rate": 1.4516242252399227e-07,
      "loss": 0.3002,
      "step": 4654
    },
    {
      "epoch": 0.2702,
      "grad_norm": 0.4205078184604645,
      "learning_rate": 1.4432860045182019e-07,
      "loss": 0.338,
      "step": 4655
    },
    {
      "epoch": 0.0002,
      "grad_norm": 0.39251989126205444,
      "learning_rate": 1.4349714497490009e-07,
      "loss": 0.3178,
      "step": 4656
    },
    {
      "epoch": 0.0004,
      "grad_norm": 0.5698876976966858,
      "learning_rate": 1.4266805649847392e-07,
      "loss": 0.3555,
      "step": 4657
    },
    {
      "epoch": 0.0006,
      "grad_norm": 0.46827054023742676,
      "learning_rate": 1.4184133542663014e-07,
      "loss": 0.3428,
      "step": 4658
    },
    {
      "epoch": 0.0008,
      "grad_norm": 0.36794552206993103,
      "learning_rate": 1.4101698216230254e-07,
      "loss": 0.3106,
      "step": 4659
    },
    {
      "epoch": 0.001,
      "grad_norm": 0.5487066507339478,
      "learning_rate": 1.4019499710726913e-07,
      "loss": 0.3575,
      "step": 4660
    },
    {
      "epoch": 0.0012,
      "grad_norm": 0.4132021367549896,
      "learning_rate": 1.3937538066215672e-07,
      "loss": 0.3494,
      "step": 4661
    },
    {
      "epoch": 0.0014,
      "grad_norm": 0.370064377784729,
      "learning_rate": 1.385581332264363e-07,
      "loss": 0.2968,
      "step": 4662
    },
    {
      "epoch": 0.0016,
      "grad_norm": 0.5368125438690186,
      "learning_rate": 1.3774325519842423e-07,
      "loss": 0.3087,
      "step": 4663
    },
    {
      "epoch": 0.0018,
      "grad_norm": 0.4308474361896515,
      "learning_rate": 1.3693074697528231e-07,
      "loss": 0.3446,
      "step": 4664
    },
    {
      "epoch": 0.002,
      "grad_norm": 0.6067838668823242,
      "learning_rate": 1.3612060895301759e-07,
      "loss": 0.3445,
      "step": 4665
    },
    {
      "epoch": 0.0022,
      "grad_norm": 0.8107783794403076,
      "learning_rate": 1.3531284152647983e-07,
      "loss": 0.3251,
      "step": 4666
    },
    {
      "epoch": 0.0024,
      "grad_norm": 0.6639647483825684,
      "learning_rate": 1.3450744508936687e-07,
      "loss": 0.3179,
      "step": 4667
    },
    {
      "epoch": 0.0026,
      "grad_norm": 0.41531801223754883,
      "learning_rate": 1.3370442003421913e-07,
      "loss": 0.3275,
      "step": 4668
    },
    {
      "epoch": 0.0028,
      "grad_norm": 0.42833778262138367,
      "learning_rate": 1.3290376675242022e-07,
      "loss": 0.3354,
      "step": 4669
    },
    {
      "epoch": 0.003,
      "grad_norm": 0.48170650005340576,
      "learning_rate": 1.3210548563419857e-07,
      "loss": 0.343,
      "step": 4670
    },
    {
      "epoch": 0.0032,
      "grad_norm": 0.6341383457183838,
      "learning_rate": 1.313095770686279e-07,
      "loss": 0.3859,
      "step": 4671
    },
    {
      "epoch": 0.0034,
      "grad_norm": 0.48735761642456055,
      "learning_rate": 1.3051604144362407e-07,
      "loss": 0.3303,
      "step": 4672
    },
    {
      "epoch": 0.0036,
      "grad_norm": 0.5222074389457703,
      "learning_rate": 1.29724879145946e-07,
      "loss": 0.3276,
      "step": 4673
    },
    {
      "epoch": 0.0038,
      "grad_norm": 0.3866554796695709,
      "learning_rate": 1.289360905611975e-07,
      "loss": 0.3438,
      "step": 4674
    },
    {
      "epoch": 0.004,
      "grad_norm": 0.43537455797195435,
      "learning_rate": 1.2814967607382433e-07,
      "loss": 0.3033,
      "step": 4675
    },
    {
      "epoch": 0.0042,
      "grad_norm": 0.4386952519416809,
      "learning_rate": 1.2736563606711384e-07,
      "loss": 0.3243,
      "step": 4676
    },
    {
      "epoch": 0.0044,
      "grad_norm": 0.454937607049942,
      "learning_rate": 1.2658397092320028e-07,
      "loss": 0.3134,
      "step": 4677
    },
    {
      "epoch": 0.0046,
      "grad_norm": 0.399565726518631,
      "learning_rate": 1.258046810230562e-07,
      "loss": 0.3208,
      "step": 4678
    },
    {
      "epoch": 0.0048,
      "grad_norm": 0.4182375967502594,
      "learning_rate": 1.2502776674649776e-07,
      "loss": 0.3389,
      "step": 4679
    },
    {
      "epoch": 0.005,
      "grad_norm": 0.5035279989242554,
      "learning_rate": 1.2425322847218368e-07,
      "loss": 0.3284,
      "step": 4680
    },
    {
      "epoch": 0.0052,
      "grad_norm": 0.4264755845069885,
      "learning_rate": 1.2348106657761537e-07,
      "loss": 0.3302,
      "step": 4681
    },
    {
      "epoch": 0.0054,
      "grad_norm": 0.5676658749580383,
      "learning_rate": 1.2271128143913458e-07,
      "loss": 0.3292,
      "step": 4682
    },
    {
      "epoch": 0.0056,
      "grad_norm": 0.5742881894111633,
      "learning_rate": 1.2194387343192504e-07,
      "loss": 0.3211,
      "step": 4683
    },
    {
      "epoch": 0.0058,
      "grad_norm": 0.470712810754776,
      "learning_rate": 1.211788429300126e-07,
      "loss": 0.3382,
      "step": 4684
    },
    {
      "epoch": 0.006,
      "grad_norm": 0.5319179892539978,
      "learning_rate": 1.2041619030626283e-07,
      "loss": 0.3727,
      "step": 4685
    },
    {
      "epoch": 0.0062,
      "grad_norm": 1.1607557535171509,
      "learning_rate": 1.1965591593238513e-07,
      "loss": 0.3248,
      "step": 4686
    },
    {
      "epoch": 0.0064,
      "grad_norm": 0.4753963053226471,
      "learning_rate": 1.1889802017892638e-07,
      "loss": 0.3282,
      "step": 4687
    },
    {
      "epoch": 0.0066,
      "grad_norm": 0.4188324213027954,
      "learning_rate": 1.1814250341527611e-07,
      "loss": 0.355,
      "step": 4688
    },
    {
      "epoch": 0.0068,
      "grad_norm": 0.3864855170249939,
      "learning_rate": 1.1738936600966366e-07,
      "loss": 0.3121,
      "step": 4689
    },
    {
      "epoch": 0.007,
      "grad_norm": 0.38769933581352234,
      "learning_rate": 1.166386083291604e-07,
      "loss": 0.3081,
      "step": 4690
    },
    {
      "epoch": 0.0072,
      "grad_norm": 0.43640953302383423,
      "learning_rate": 1.1589023073967586e-07,
      "loss": 0.3169,
      "step": 4691
    },
    {
      "epoch": 0.0074,
      "grad_norm": 0.7401105761528015,
      "learning_rate": 1.1514423360595939e-07,
      "loss": 0.3177,
      "step": 4692
    },
    {
      "epoch": 0.0076,
      "grad_norm": 0.4346793293952942,
      "learning_rate": 1.1440061729160235e-07,
      "loss": 0.3526,
      "step": 4693
    },
    {
      "epoch": 0.0078,
      "grad_norm": 0.4309486746788025,
      "learning_rate": 1.136593821590326e-07,
      "loss": 0.3505,
      "step": 4694
    },
    {
      "epoch": 0.008,
      "grad_norm": 0.5046737790107727,
      "learning_rate": 1.1292052856952063e-07,
      "loss": 0.3415,
      "step": 4695
    },
    {
      "epoch": 0.0082,
      "grad_norm": 0.4320617914199829,
      "learning_rate": 1.1218405688317447e-07,
      "loss": 0.3352,
      "step": 4696
    },
    {
      "epoch": 0.0084,
      "grad_norm": 0.7351077198982239,
      "learning_rate": 1.1144996745894033e-07,
      "loss": 0.3366,
      "step": 4697
    },
    {
      "epoch": 0.0086,
      "grad_norm": 0.5165927410125732,
      "learning_rate": 1.107182606546059e-07,
      "loss": 0.3402,
      "step": 4698
    },
    {
      "epoch": 0.0088,
      "grad_norm": 0.4888964295387268,
      "learning_rate": 1.0998893682679479e-07,
      "loss": 0.3288,
      "step": 4699
    },
    {
      "epoch": 0.009,
      "grad_norm": 0.5114607214927673,
      "learning_rate": 1.0926199633097156e-07,
      "loss": 0.3397,
      "step": 4700
    },
    {
      "epoch": 0.0092,
      "grad_norm": 0.5630620718002319,
      "learning_rate": 1.0853743952143836e-07,
      "loss": 0.3387,
      "step": 4701
    },
    {
      "epoch": 0.0094,
      "grad_norm": 0.43510088324546814,
      "learning_rate": 1.0781526675133492e-07,
      "loss": 0.3183,
      "step": 4702
    },
    {
      "epoch": 0.0096,
      "grad_norm": 0.3584558963775635,
      "learning_rate": 1.0709547837263967e-07,
      "loss": 0.2909,
      "step": 4703
    },
    {
      "epoch": 0.0098,
      "grad_norm": 0.6064521074295044,
      "learning_rate": 1.0637807473616812e-07,
      "loss": 0.3562,
      "step": 4704
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.35960155725479126,
      "learning_rate": 1.0566305619157502e-07,
      "loss": 0.3327,
      "step": 4705
    },
    {
      "epoch": 0.0102,
      "grad_norm": 0.40141189098358154,
      "learning_rate": 1.0495042308735104e-07,
      "loss": 0.3079,
      "step": 4706
    },
    {
      "epoch": 0.0104,
      "grad_norm": 0.49856722354888916,
      "learning_rate": 1.0424017577082556e-07,
      "loss": 0.3136,
      "step": 4707
    },
    {
      "epoch": 0.0106,
      "grad_norm": 0.7121588587760925,
      "learning_rate": 1.0353231458816338e-07,
      "loss": 0.3441,
      "step": 4708
    },
    {
      "epoch": 0.0108,
      "grad_norm": 0.45959147810935974,
      "learning_rate": 1.0282683988436792e-07,
      "loss": 0.3314,
      "step": 4709
    },
    {
      "epoch": 0.011,
      "grad_norm": 0.5085610151290894,
      "learning_rate": 1.0212375200327973e-07,
      "loss": 0.3385,
      "step": 4710
    },
    {
      "epoch": 0.0112,
      "grad_norm": 0.43585294485092163,
      "learning_rate": 1.0142305128757468e-07,
      "loss": 0.3248,
      "step": 4711
    },
    {
      "epoch": 0.0114,
      "grad_norm": 0.4851681888103485,
      "learning_rate": 1.007247380787657e-07,
      "loss": 0.3424,
      "step": 4712
    },
    {
      "epoch": 0.0116,
      "grad_norm": 0.49992984533309937,
      "learning_rate": 1.0002881271720222e-07,
      "loss": 0.3302,
      "step": 4713
    },
    {
      "epoch": 0.0118,
      "grad_norm": 0.4991665184497833,
      "learning_rate": 9.933527554207012e-08,
      "loss": 0.3188,
      "step": 4714
    },
    {
      "epoch": 0.012,
      "grad_norm": 0.501101016998291,
      "learning_rate": 9.864412689139124e-08,
      "loss": 0.3192,
      "step": 4715
    },
    {
      "epoch": 0.0122,
      "grad_norm": 0.41049155592918396,
      "learning_rate": 9.795536710202169e-08,
      "loss": 0.3057,
      "step": 4716
    },
    {
      "epoch": 0.0124,
      "grad_norm": 0.5212928056716919,
      "learning_rate": 9.726899650965626e-08,
      "loss": 0.3356,
      "step": 4717
    },
    {
      "epoch": 0.0126,
      "grad_norm": 0.4828174114227295,
      "learning_rate": 9.658501544882182e-08,
      "loss": 0.3657,
      "step": 4718
    },
    {
      "epoch": 0.0128,
      "grad_norm": 0.4165886342525482,
      "learning_rate": 9.590342425288446e-08,
      "loss": 0.2663,
      "step": 4719
    },
    {
      "epoch": 0.013,
      "grad_norm": 0.5178773403167725,
      "learning_rate": 9.522422325404234e-08,
      "loss": 0.3642,
      "step": 4720
    },
    {
      "epoch": 0.0132,
      "grad_norm": 0.38192176818847656,
      "learning_rate": 9.454741278333013e-08,
      "loss": 0.3444,
      "step": 4721
    },
    {
      "epoch": 0.0134,
      "grad_norm": 0.4194165766239166,
      "learning_rate": 9.387299317061615e-08,
      "loss": 0.3088,
      "step": 4722
    },
    {
      "epoch": 0.0136,
      "grad_norm": 0.4740980267524719,
      "learning_rate": 9.320096474460527e-08,
      "loss": 0.3578,
      "step": 4723
    },
    {
      "epoch": 0.0138,
      "grad_norm": 0.44922876358032227,
      "learning_rate": 9.253132783283548e-08,
      "loss": 0.3481,
      "step": 4724
    },
    {
      "epoch": 0.014,
      "grad_norm": 0.4827326834201813,
      "learning_rate": 9.186408276168012e-08,
      "loss": 0.3297,
      "step": 4725
    },
    {
      "epoch": 0.0142,
      "grad_norm": 0.4814259707927704,
      "learning_rate": 9.119922985634633e-08,
      "loss": 0.2991,
      "step": 4726
    },
    {
      "epoch": 0.0144,
      "grad_norm": 0.5083125233650208,
      "learning_rate": 9.053676944087542e-08,
      "loss": 0.3234,
      "step": 4727
    },
    {
      "epoch": 0.0146,
      "grad_norm": 0.6291019320487976,
      "learning_rate": 8.987670183814134e-08,
      "loss": 0.3397,
      "step": 4728
    },
    {
      "epoch": 0.0148,
      "grad_norm": 0.3973853290081024,
      "learning_rate": 8.921902736985399e-08,
      "loss": 0.3469,
      "step": 4729
    },
    {
      "epoch": 0.015,
      "grad_norm": 0.47562175989151,
      "learning_rate": 8.856374635655696e-08,
      "loss": 0.3463,
      "step": 4730
    },
    {
      "epoch": 0.0152,
      "grad_norm": 0.709004819393158,
      "learning_rate": 8.791085911762476e-08,
      "loss": 0.3321,
      "step": 4731
    },
    {
      "epoch": 0.0154,
      "grad_norm": 0.872928261756897,
      "learning_rate": 8.726036597126619e-08,
      "loss": 0.3265,
      "step": 4732
    },
    {
      "epoch": 0.0156,
      "grad_norm": 0.6977161765098572,
      "learning_rate": 8.661226723452542e-08,
      "loss": 0.3152,
      "step": 4733
    },
    {
      "epoch": 0.0158,
      "grad_norm": 0.47681283950805664,
      "learning_rate": 8.596656322327645e-08,
      "loss": 0.3489,
      "step": 4734
    },
    {
      "epoch": 0.016,
      "grad_norm": 0.7906132340431213,
      "learning_rate": 8.53232542522292e-08,
      "loss": 0.3374,
      "step": 4735
    },
    {
      "epoch": 0.0162,
      "grad_norm": 0.3898230791091919,
      "learning_rate": 8.468234063492287e-08,
      "loss": 0.3263,
      "step": 4736
    },
    {
      "epoch": 0.0164,
      "grad_norm": 0.603818953037262,
      "learning_rate": 8.404382268373145e-08,
      "loss": 0.3107,
      "step": 4737
    },
    {
      "epoch": 0.0166,
      "grad_norm": 0.5070759654045105,
      "learning_rate": 8.340770070986215e-08,
      "loss": 0.3527,
      "step": 4738
    },
    {
      "epoch": 0.0168,
      "grad_norm": 0.4831944406032562,
      "learning_rate": 8.277397502335194e-08,
      "loss": 0.3428,
      "step": 4739
    },
    {
      "epoch": 0.017,
      "grad_norm": 0.4045814275741577,
      "learning_rate": 8.214264593307097e-08,
      "loss": 0.3076,
      "step": 4740
    },
    {
      "epoch": 0.0172,
      "grad_norm": 0.4477141499519348,
      "learning_rate": 8.151371374672146e-08,
      "loss": 0.2923,
      "step": 4741
    },
    {
      "epoch": 0.0174,
      "grad_norm": 0.4486124515533447,
      "learning_rate": 8.088717877083706e-08,
      "loss": 0.3551,
      "step": 4742
    },
    {
      "epoch": 0.0176,
      "grad_norm": 0.49428772926330566,
      "learning_rate": 8.02630413107841e-08,
      "loss": 0.3229,
      "step": 4743
    },
    {
      "epoch": 0.0178,
      "grad_norm": 0.37898653745651245,
      "learning_rate": 7.964130167075923e-08,
      "loss": 0.338,
      "step": 4744
    },
    {
      "epoch": 0.018,
      "grad_norm": 0.502548098564148,
      "learning_rate": 7.90219601537906e-08,
      "loss": 0.3315,
      "step": 4745
    },
    {
      "epoch": 0.0182,
      "grad_norm": 1.1786963939666748,
      "learning_rate": 7.840501706173786e-08,
      "loss": 0.3493,
      "step": 4746
    },
    {
      "epoch": 0.0184,
      "grad_norm": 0.4260566830635071,
      "learning_rate": 7.779047269529105e-08,
      "loss": 0.3063,
      "step": 4747
    },
    {
      "epoch": 0.0186,
      "grad_norm": 0.45523497462272644,
      "learning_rate": 7.717832735397335e-08,
      "loss": 0.3405,
      "step": 4748
    },
    {
      "epoch": 0.0188,
      "grad_norm": 0.34608176350593567,
      "learning_rate": 7.656858133613498e-08,
      "loss": 0.3129,
      "step": 4749
    },
    {
      "epoch": 0.019,
      "grad_norm": 0.8952856063842773,
      "learning_rate": 7.59612349389599e-08,
      "loss": 0.3119,
      "step": 4750
    },
    {
      "epoch": 0.0192,
      "grad_norm": 0.48815256357192993,
      "learning_rate": 7.535628845846077e-08,
      "loss": 0.3241,
      "step": 4751
    },
    {
      "epoch": 0.0194,
      "grad_norm": 0.38107848167419434,
      "learning_rate": 7.475374218948118e-08,
      "loss": 0.3077,
      "step": 4752
    },
    {
      "epoch": 0.0196,
      "grad_norm": 0.49796783924102783,
      "learning_rate": 7.415359642569564e-08,
      "loss": 0.3043,
      "step": 4753
    },
    {
      "epoch": 0.0198,
      "grad_norm": 0.5285736918449402,
      "learning_rate": 7.355585145960743e-08,
      "loss": 0.3373,
      "step": 4754
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.4435095191001892,
      "learning_rate": 7.296050758254958e-08,
      "loss": 0.3698,
      "step": 4755
    },
    {
      "epoch": 0.0202,
      "grad_norm": 0.7090453505516052,
      "learning_rate": 7.236756508468612e-08,
      "loss": 0.3337,
      "step": 4756
    },
    {
      "epoch": 0.0204,
      "grad_norm": 0.473169207572937,
      "learning_rate": 7.177702425500977e-08,
      "loss": 0.3202,
      "step": 4757
    },
    {
      "epoch": 0.0206,
      "grad_norm": 0.4381136894226074,
      "learning_rate": 7.118888538134361e-08,
      "loss": 0.3246,
      "step": 4758
    },
    {
      "epoch": 0.0208,
      "grad_norm": 0.4808897376060486,
      "learning_rate": 7.060314875033836e-08,
      "loss": 0.3211,
      "step": 4759
    },
    {
      "epoch": 0.021,
      "grad_norm": 0.6551614999771118,
      "learning_rate": 7.001981464747565e-08,
      "loss": 0.3708,
      "step": 4760
    },
    {
      "epoch": 0.0212,
      "grad_norm": 0.3812798261642456,
      "learning_rate": 6.943888335706472e-08,
      "loss": 0.3143,
      "step": 4761
    },
    {
      "epoch": 0.0214,
      "grad_norm": 0.7698546051979065,
      "learning_rate": 6.88603551622452e-08,
      "loss": 0.3326,
      "step": 4762
    },
    {
      "epoch": 0.0216,
      "grad_norm": 0.4213687479496002,
      "learning_rate": 6.828423034498488e-08,
      "loss": 0.3208,
      "step": 4763
    },
    {
      "epoch": 0.0218,
      "grad_norm": 0.3736777901649475,
      "learning_rate": 6.771050918607913e-08,
      "loss": 0.3251,
      "step": 4764
    },
    {
      "epoch": 0.022,
      "grad_norm": 1.31650710105896,
      "learning_rate": 6.713919196515317e-08,
      "loss": 0.3217,
      "step": 4765
    },
    {
      "epoch": 0.0222,
      "grad_norm": 0.48407837748527527,
      "learning_rate": 6.657027896065982e-08,
      "loss": 0.3438,
      "step": 4766
    },
    {
      "epoch": 0.0224,
      "grad_norm": 0.4884192645549774,
      "learning_rate": 6.60037704498806e-08,
      "loss": 0.3512,
      "step": 4767
    },
    {
      "epoch": 0.0226,
      "grad_norm": 0.4053954482078552,
      "learning_rate": 6.543966670892465e-08,
      "loss": 0.3098,
      "step": 4768
    },
    {
      "epoch": 0.0228,
      "grad_norm": 0.4046313464641571,
      "learning_rate": 6.487796801272983e-08,
      "loss": 0.3248,
      "step": 4769
    },
    {
      "epoch": 0.023,
      "grad_norm": 0.3923828899860382,
      "learning_rate": 6.431867463506047e-08,
      "loss": 0.3204,
      "step": 4770
    },
    {
      "epoch": 0.0232,
      "grad_norm": 0.4397640526294708,
      "learning_rate": 6.376178684850965e-08,
      "loss": 0.3555,
      "step": 4771
    },
    {
      "epoch": 0.0234,
      "grad_norm": 0.43832406401634216,
      "learning_rate": 6.3207304924498e-08,
      "loss": 0.3072,
      "step": 4772
    },
    {
      "epoch": 0.0236,
      "grad_norm": 0.4474277198314667,
      "learning_rate": 6.265522913327326e-08,
      "loss": 0.336,
      "step": 4773
    },
    {
      "epoch": 0.0238,
      "grad_norm": 0.4629674553871155,
      "learning_rate": 6.210555974391075e-08,
      "loss": 0.3418,
      "step": 4774
    },
    {
      "epoch": 0.024,
      "grad_norm": 0.4827856421470642,
      "learning_rate": 6.15582970243117e-08,
      "loss": 0.343,
      "step": 4775
    },
    {
      "epoch": 0.0242,
      "grad_norm": 0.4225843548774719,
      "learning_rate": 6.101344124120557e-08,
      "loss": 0.3392,
      "step": 4776
    },
    {
      "epoch": 0.0244,
      "grad_norm": 0.42378711700439453,
      "learning_rate": 6.047099266014877e-08,
      "loss": 0.3413,
      "step": 4777
    },
    {
      "epoch": 0.0246,
      "grad_norm": 0.5447564721107483,
      "learning_rate": 5.993095154552431e-08,
      "loss": 0.3292,
      "step": 4778
    },
    {
      "epoch": 0.0248,
      "grad_norm": 0.5794199109077454,
      "learning_rate": 5.939331816054161e-08,
      "loss": 0.3708,
      "step": 4779
    },
    {
      "epoch": 0.025,
      "grad_norm": 0.42652592062950134,
      "learning_rate": 5.8858092767236084e-08,
      "loss": 0.3266,
      "step": 4780
    },
    {
      "epoch": 0.0252,
      "grad_norm": 0.4497049152851105,
      "learning_rate": 5.8325275626470166e-08,
      "loss": 0.3588,
      "step": 4781
    },
    {
      "epoch": 0.0254,
      "grad_norm": 0.4479667544364929,
      "learning_rate": 5.7794866997933355e-08,
      "loss": 0.3494,
      "step": 4782
    },
    {
      "epoch": 0.0256,
      "grad_norm": 0.5386527180671692,
      "learning_rate": 5.726686714013996e-08,
      "loss": 0.3439,
      "step": 4783
    },
    {
      "epoch": 0.0258,
      "grad_norm": 0.5381243228912354,
      "learning_rate": 5.674127631043025e-08,
      "loss": 0.3552,
      "step": 4784
    },
    {
      "epoch": 0.026,
      "grad_norm": 0.46928414702415466,
      "learning_rate": 5.621809476497098e-08,
      "loss": 0.3529,
      "step": 4785
    },
    {
      "epoch": 0.0262,
      "grad_norm": 0.5347269177436829,
      "learning_rate": 5.569732275875428e-08,
      "loss": 0.34,
      "step": 4786
    },
    {
      "epoch": 0.0264,
      "grad_norm": 0.48210784792900085,
      "learning_rate": 5.517896054559879e-08,
      "loss": 0.3247,
      "step": 4787
    },
    {
      "epoch": 0.0266,
      "grad_norm": 0.45099377632141113,
      "learning_rate": 5.466300837814797e-08,
      "loss": 0.3281,
      "step": 4788
    },
    {
      "epoch": 0.0268,
      "grad_norm": 0.4242096245288849,
      "learning_rate": 5.414946650786957e-08,
      "loss": 0.2895,
      "step": 4789
    },
    {
      "epoch": 0.027,
      "grad_norm": 0.45455503463745117,
      "learning_rate": 5.363833518505834e-08,
      "loss": 0.3547,
      "step": 4790
    },
    {
      "epoch": 0.0272,
      "grad_norm": 0.5508436560630798,
      "learning_rate": 5.312961465883393e-08,
      "loss": 0.3265,
      "step": 4791
    },
    {
      "epoch": 0.0274,
      "grad_norm": 0.6061508059501648,
      "learning_rate": 5.262330517713965e-08,
      "loss": 0.291,
      "step": 4792
    },
    {
      "epoch": 0.0276,
      "grad_norm": 0.4578220844268799,
      "learning_rate": 5.2119406986745336e-08,
      "loss": 0.3228,
      "step": 4793
    },
    {
      "epoch": 0.0278,
      "grad_norm": 0.41361913084983826,
      "learning_rate": 5.161792033324398e-08,
      "loss": 0.3095,
      "step": 4794
    },
    {
      "epoch": 0.028,
      "grad_norm": 0.5321029424667358,
      "learning_rate": 5.111884546105506e-08,
      "loss": 0.3373,
      "step": 4795
    },
    {
      "epoch": 0.0282,
      "grad_norm": 0.4467407763004303,
      "learning_rate": 5.062218261342122e-08,
      "loss": 0.3188,
      "step": 4796
    },
    {
      "epoch": 0.0284,
      "grad_norm": 0.5788113474845886,
      "learning_rate": 5.012793203240995e-08,
      "loss": 0.3635,
      "step": 4797
    },
    {
      "epoch": 0.0286,
      "grad_norm": 0.41333022713661194,
      "learning_rate": 4.9636093958913e-08,
      "loss": 0.3335,
      "step": 4798
    },
    {
      "epoch": 0.0288,
      "grad_norm": 0.38449081778526306,
      "learning_rate": 4.914666863264528e-08,
      "loss": 0.3497,
      "step": 4799
    },
    {
      "epoch": 0.029,
      "grad_norm": 0.6596280932426453,
      "learning_rate": 4.865965629214819e-08,
      "loss": 0.3649,
      "step": 4800
    },
    {
      "epoch": 0.0292,
      "grad_norm": 0.5550872087478638,
      "learning_rate": 4.8175057174785766e-08,
      "loss": 0.363,
      "step": 4801
    },
    {
      "epoch": 0.0294,
      "grad_norm": 0.4786381125450134,
      "learning_rate": 4.769287151674407e-08,
      "loss": 0.3531,
      "step": 4802
    },
    {
      "epoch": 0.0296,
      "grad_norm": 0.397653192281723,
      "learning_rate": 4.7213099553035655e-08,
      "loss": 0.3038,
      "step": 4803
    },
    {
      "epoch": 0.0298,
      "grad_norm": 0.41187193989753723,
      "learning_rate": 4.6735741517495715e-08,
      "loss": 0.3376,
      "step": 4804
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.4934803247451782,
      "learning_rate": 4.626079764278202e-08,
      "loss": 0.3397,
      "step": 4805
    },
    {
      "epoch": 0.0302,
      "grad_norm": 0.4678121507167816,
      "learning_rate": 4.578826816037718e-08,
      "loss": 0.3146,
      "step": 4806
    },
    {
      "epoch": 0.0304,
      "grad_norm": 0.4545961618423462,
      "learning_rate": 4.531815330058586e-08,
      "loss": 0.3256,
      "step": 4807
    },
    {
      "epoch": 0.0306,
      "grad_norm": 0.5300041437149048,
      "learning_rate": 4.485045329253646e-08,
      "loss": 0.3752,
      "step": 4808
    },
    {
      "epoch": 0.0308,
      "grad_norm": 0.5888460278511047,
      "learning_rate": 4.438516836417994e-08,
      "loss": 0.3627,
      "step": 4809
    },
    {
      "epoch": 0.031,
      "grad_norm": 0.4599490463733673,
      "learning_rate": 4.392229874229159e-08,
      "loss": 0.3392,
      "step": 4810
    },
    {
      "epoch": 0.0312,
      "grad_norm": 0.4561294615268707,
      "learning_rate": 4.346184465246761e-08,
      "loss": 0.3325,
      "step": 4811
    },
    {
      "epoch": 0.0314,
      "grad_norm": 0.813010036945343,
      "learning_rate": 4.3003806319127376e-08,
      "loss": 0.3271,
      "step": 4812
    },
    {
      "epoch": 0.0316,
      "grad_norm": 0.4112866520881653,
      "learning_rate": 4.2548183965513415e-08,
      "loss": 0.3258,
      "step": 4813
    },
    {
      "epoch": 0.0318,
      "grad_norm": 0.4928356409072876,
      "learning_rate": 4.209497781369143e-08,
      "loss": 0.2837,
      "step": 4814
    },
    {
      "epoch": 0.032,
      "grad_norm": 0.49096083641052246,
      "learning_rate": 4.164418808454806e-08,
      "loss": 0.3141,
      "step": 4815
    },
    {
      "epoch": 0.0322,
      "grad_norm": 0.4996338188648224,
      "learning_rate": 4.1195814997792014e-08,
      "loss": 0.3256,
      "step": 4816
    },
    {
      "epoch": 0.0324,
      "grad_norm": 0.7773621082305908,
      "learning_rate": 4.0749858771956253e-08,
      "loss": 0.3008,
      "step": 4817
    },
    {
      "epoch": 0.0326,
      "grad_norm": 0.43193545937538147,
      "learning_rate": 4.030631962439302e-08,
      "loss": 0.3657,
      "step": 4818
    },
    {
      "epoch": 0.0328,
      "grad_norm": 0.4600306451320648,
      "learning_rate": 3.986519777127884e-08,
      "loss": 0.3286,
      "step": 4819
    },
    {
      "epoch": 0.033,
      "grad_norm": 0.4610413908958435,
      "learning_rate": 3.9426493427611177e-08,
      "loss": 0.3628,
      "step": 4820
    },
    {
      "epoch": 0.0332,
      "grad_norm": 0.6046647429466248,
      "learning_rate": 3.899020680720844e-08,
      "loss": 0.3145,
      "step": 4821
    },
    {
      "epoch": 0.0334,
      "grad_norm": 0.39264294505119324,
      "learning_rate": 3.855633812271165e-08,
      "loss": 0.3033,
      "step": 4822
    },
    {
      "epoch": 0.0336,
      "grad_norm": 0.5032731294631958,
      "learning_rate": 3.812488758558386e-08,
      "loss": 0.2957,
      "step": 4823
    },
    {
      "epoch": 0.0338,
      "grad_norm": 0.4785033166408539,
      "learning_rate": 3.769585540610799e-08,
      "loss": 0.3206,
      "step": 4824
    },
    {
      "epoch": 0.034,
      "grad_norm": 0.4093025028705597,
      "learning_rate": 3.726924179339009e-08,
      "loss": 0.3395,
      "step": 4825
    },
    {
      "epoch": 0.0342,
      "grad_norm": 0.38270607590675354,
      "learning_rate": 3.684504695535496e-08,
      "loss": 0.3315,
      "step": 4826
    },
    {
      "epoch": 0.0344,
      "grad_norm": 0.438531756401062,
      "learning_rate": 3.642327109875166e-08,
      "loss": 0.3077,
      "step": 4827
    },
    {
      "epoch": 0.0346,
      "grad_norm": 0.45348745584487915,
      "learning_rate": 3.600391442914741e-08,
      "loss": 0.3387,
      "step": 4828
    },
    {
      "epoch": 0.0348,
      "grad_norm": 0.5960204005241394,
      "learning_rate": 3.558697715093207e-08,
      "loss": 0.3182,
      "step": 4829
    },
    {
      "epoch": 0.035,
      "grad_norm": 0.5552530288696289,
      "learning_rate": 3.517245946731529e-08,
      "loss": 0.3505,
      "step": 4830
    },
    {
      "epoch": 0.0352,
      "grad_norm": 0.4574570953845978,
      "learning_rate": 3.47603615803288e-08,
      "loss": 0.3082,
      "step": 4831
    },
    {
      "epoch": 0.0354,
      "grad_norm": 0.4544902443885803,
      "learning_rate": 3.435068369082306e-08,
      "loss": 0.3084,
      "step": 4832
    },
    {
      "epoch": 0.0356,
      "grad_norm": 0.43576598167419434,
      "learning_rate": 3.394342599847111e-08,
      "loss": 0.3305,
      "step": 4833
    },
    {
      "epoch": 0.0358,
      "grad_norm": 0.5625936388969421,
      "learning_rate": 3.3538588701765296e-08,
      "loss": 0.3757,
      "step": 4834
    },
    {
      "epoch": 0.036,
      "grad_norm": 0.4007652997970581,
      "learning_rate": 3.313617199801777e-08,
      "loss": 0.3444,
      "step": 4835
    },
    {
      "epoch": 0.0362,
      "grad_norm": 0.4061969816684723,
      "learning_rate": 3.2736176083362216e-08,
      "loss": 0.3132,
      "step": 4836
    },
    {
      "epoch": 0.0364,
      "grad_norm": 0.4213486313819885,
      "learning_rate": 3.2338601152751e-08,
      "loss": 0.3509,
      "step": 4837
    },
    {
      "epoch": 0.0366,
      "grad_norm": 0.3792661428451538,
      "learning_rate": 3.194344739995803e-08,
      "loss": 0.2946,
      "step": 4838
    },
    {
      "epoch": 0.0368,
      "grad_norm": 0.3948346972465515,
      "learning_rate": 3.1550715017575895e-08,
      "loss": 0.3219,
      "step": 4839
    },
    {
      "epoch": 0.037,
      "grad_norm": 0.465410441160202,
      "learning_rate": 3.1160404197018155e-08,
      "loss": 0.3735,
      "step": 4840
    },
    {
      "epoch": 0.0372,
      "grad_norm": 0.4858006536960602,
      "learning_rate": 3.077251512851709e-08,
      "loss": 0.3492,
      "step": 4841
    },
    {
      "epoch": 0.0374,
      "grad_norm": 0.4396311342716217,
      "learning_rate": 3.038704800112535e-08,
      "loss": 0.3029,
      "step": 4842
    },
    {
      "epoch": 0.0376,
      "grad_norm": 0.4730176329612732,
      "learning_rate": 3.0004003002714886e-08,
      "loss": 0.349,
      "step": 4843
    },
    {
      "epoch": 0.0378,
      "grad_norm": 0.5650606751441956,
      "learning_rate": 2.9623380319976912e-08,
      "loss": 0.3056,
      "step": 4844
    },
    {
      "epoch": 0.038,
      "grad_norm": 4.096367359161377,
      "learning_rate": 2.9245180138423033e-08,
      "loss": 0.3193,
      "step": 4845
    },
    {
      "epoch": 0.0382,
      "grad_norm": 0.5273382663726807,
      "learning_rate": 2.8869402642382473e-08,
      "loss": 0.3389,
      "step": 4846
    },
    {
      "epoch": 0.0384,
      "grad_norm": 2.6522650718688965,
      "learning_rate": 2.8496048015005385e-08,
      "loss": 0.321,
      "step": 4847
    },
    {
      "epoch": 0.0386,
      "grad_norm": 0.36972877383232117,
      "learning_rate": 2.8125116438260104e-08,
      "loss": 0.318,
      "step": 4848
    },
    {
      "epoch": 0.0388,
      "grad_norm": 0.5419030785560608,
      "learning_rate": 2.7756608092933678e-08,
      "loss": 0.3518,
      "step": 4849
    },
    {
      "epoch": 0.039,
      "grad_norm": 0.462184876203537,
      "learning_rate": 2.7390523158633552e-08,
      "loss": 0.3936,
      "step": 4850
    },
    {
      "epoch": 0.0392,
      "grad_norm": 0.5205352902412415,
      "learning_rate": 2.7026861813783668e-08,
      "loss": 0.3176,
      "step": 4851
    },
    {
      "epoch": 0.0394,
      "grad_norm": 0.5487911701202393,
      "learning_rate": 2.6665624235629463e-08,
      "loss": 0.3077,
      "step": 4852
    },
    {
      "epoch": 0.0396,
      "grad_norm": 0.5310013294219971,
      "learning_rate": 2.6306810600233435e-08,
      "loss": 0.3174,
      "step": 4853
    },
    {
      "epoch": 0.0398,
      "grad_norm": 4.407668590545654,
      "learning_rate": 2.5950421082476805e-08,
      "loss": 0.2979,
      "step": 4854
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5318701267242432,
      "learning_rate": 2.5596455856058966e-08,
      "loss": 0.3196,
      "step": 4855
    },
    {
      "epoch": 0.0402,
      "grad_norm": 0.4391268491744995,
      "learning_rate": 2.5244915093499134e-08,
      "loss": 0.3165,
      "step": 4856
    },
    {
      "epoch": 0.0404,
      "grad_norm": 0.4107357859611511,
      "learning_rate": 2.489579896613359e-08,
      "loss": 0.3446,
      "step": 4857
    },
    {
      "epoch": 0.0406,
      "grad_norm": 0.38798218965530396,
      "learning_rate": 2.4549107644117888e-08,
      "loss": 0.3321,
      "step": 4858
    },
    {
      "epoch": 0.0408,
      "grad_norm": 0.39042478799819946,
      "learning_rate": 2.4204841296424086e-08,
      "loss": 0.334,
      "step": 4859
    },
    {
      "epoch": 0.041,
      "grad_norm": 0.42818117141723633,
      "learning_rate": 2.386300009084408e-08,
      "loss": 0.3213,
      "step": 4860
    },
    {
      "epoch": 0.0412,
      "grad_norm": 0.4592888057231903,
      "learning_rate": 2.3523584193986816e-08,
      "loss": 0.3369,
      "step": 4861
    },
    {
      "epoch": 0.0414,
      "grad_norm": 0.4295707941055298,
      "learning_rate": 2.3186593771280518e-08,
      "loss": 0.3448,
      "step": 4862
    },
    {
      "epoch": 0.0416,
      "grad_norm": 0.3860400915145874,
      "learning_rate": 2.285202898696881e-08,
      "loss": 0.311,
      "step": 4863
    },
    {
      "epoch": 0.0418,
      "grad_norm": 0.45240551233291626,
      "learning_rate": 2.251989000411514e-08,
      "loss": 0.3164,
      "step": 4864
    },
    {
      "epoch": 0.042,
      "grad_norm": 1.3583006858825684,
      "learning_rate": 2.219017698460002e-08,
      "loss": 0.341,
      "step": 4865
    },
    {
      "epoch": 0.0422,
      "grad_norm": 0.33537980914115906,
      "learning_rate": 2.1862890089121567e-08,
      "loss": 0.2863,
      "step": 4866
    },
    {
      "epoch": 0.0424,
      "grad_norm": 0.3939952552318573,
      "learning_rate": 2.1538029477195522e-08,
      "loss": 0.2995,
      "step": 4867
    },
    {
      "epoch": 0.0426,
      "grad_norm": 0.41878247261047363,
      "learning_rate": 2.1215595307154667e-08,
      "loss": 0.3239,
      "step": 4868
    },
    {
      "epoch": 0.0428,
      "grad_norm": 0.4346262514591217,
      "learning_rate": 2.0895587736149414e-08,
      "loss": 0.3354,
      "step": 4869
    },
    {
      "epoch": 0.043,
      "grad_norm": 0.5262599587440491,
      "learning_rate": 2.057800692014833e-08,
      "loss": 0.3339,
      "step": 4870
    },
    {
      "epoch": 0.0432,
      "grad_norm": 0.4208918511867523,
      "learning_rate": 2.026285301393538e-08,
      "loss": 0.3398,
      "step": 4871
    },
    {
      "epoch": 0.0434,
      "grad_norm": 0.45649099349975586,
      "learning_rate": 1.995012617111436e-08,
      "loss": 0.3511,
      "step": 4872
    },
    {
      "epoch": 0.0436,
      "grad_norm": 0.38207921385765076,
      "learning_rate": 1.963982654410279e-08,
      "loss": 0.3094,
      "step": 4873
    },
    {
      "epoch": 0.0438,
      "grad_norm": 0.4806406497955322,
      "learning_rate": 1.9331954284137476e-08,
      "loss": 0.3342,
      "step": 4874
    },
    {
      "epoch": 0.044,
      "grad_norm": 0.5048065781593323,
      "learning_rate": 1.9026509541272276e-08,
      "loss": 0.3549,
      "step": 4875
    },
    {
      "epoch": 0.0442,
      "grad_norm": 0.7354903221130371,
      "learning_rate": 1.8723492464376992e-08,
      "loss": 0.3203,
      "step": 4876
    },
    {
      "epoch": 0.0444,
      "grad_norm": 0.4296565353870392,
      "learning_rate": 1.842290320113793e-08,
      "loss": 0.3553,
      "step": 4877
    },
    {
      "epoch": 0.0446,
      "grad_norm": 0.5063086152076721,
      "learning_rate": 1.8124741898058462e-08,
      "loss": 0.3236,
      "step": 4878
    },
    {
      "epoch": 0.0448,
      "grad_norm": 0.6973747611045837,
      "learning_rate": 1.7829008700460116e-08,
      "loss": 0.3554,
      "step": 4879
    },
    {
      "epoch": 0.045,
      "grad_norm": 0.5638706684112549,
      "learning_rate": 1.753570375247815e-08,
      "loss": 0.3261,
      "step": 4880
    },
    {
      "epoch": 0.0452,
      "grad_norm": 0.5037717819213867,
      "learning_rate": 1.7244827197067103e-08,
      "loss": 0.3188,
      "step": 4881
    },
    {
      "epoch": 0.0454,
      "grad_norm": 0.4227774441242218,
      "learning_rate": 1.6956379175995796e-08,
      "loss": 0.3349,
      "step": 4882
    },
    {
      "epoch": 0.0456,
      "grad_norm": 0.3795003592967987,
      "learning_rate": 1.6670359829850657e-08,
      "loss": 0.3219,
      "step": 4883
    },
    {
      "epoch": 0.0458,
      "grad_norm": 0.47013336420059204,
      "learning_rate": 1.6386769298034067e-08,
      "loss": 0.3242,
      "step": 4884
    },
    {
      "epoch": 0.046,
      "grad_norm": 0.5066186189651489,
      "learning_rate": 1.610560771876435e-08,
      "loss": 0.3492,
      "step": 4885
    },
    {
      "epoch": 0.0462,
      "grad_norm": 0.5043500065803528,
      "learning_rate": 1.582687522907633e-08,
      "loss": 0.3463,
      "step": 4886
    },
    {
      "epoch": 0.0464,
      "grad_norm": 0.40905869007110596,
      "learning_rate": 1.5550571964820793e-08,
      "loss": 0.3185,
      "step": 4887
    },
    {
      "epoch": 0.0466,
      "grad_norm": 0.4718409478664398,
      "learning_rate": 1.5276698060665007e-08,
      "loss": 0.3506,
      "step": 4888
    },
    {
      "epoch": 0.0468,
      "grad_norm": 0.4787062704563141,
      "learning_rate": 1.500525365009109e-08,
      "loss": 0.336,
      "step": 4889
    },
    {
      "epoch": 0.047,
      "grad_norm": 0.3744618892669678,
      "learning_rate": 1.4736238865398766e-08,
      "loss": 0.3386,
      "step": 4890
    },
    {
      "epoch": 0.0472,
      "grad_norm": 0.522703230381012,
      "learning_rate": 1.4469653837701491e-08,
      "loss": 0.3574,
      "step": 4891
    },
    {
      "epoch": 0.0474,
      "grad_norm": 0.43621647357940674,
      "learning_rate": 1.4205498696930332e-08,
      "loss": 0.3148,
      "step": 4892
    },
    {
      "epoch": 0.0476,
      "grad_norm": 0.40729275345802307,
      "learning_rate": 1.3943773571831188e-08,
      "loss": 0.3355,
      "step": 4893
    },
    {
      "epoch": 0.0478,
      "grad_norm": 0.41116148233413696,
      "learning_rate": 1.3684478589964801e-08,
      "loss": 0.3194,
      "step": 4894
    },
    {
      "epoch": 0.048,
      "grad_norm": 0.46481403708457947,
      "learning_rate": 1.3427613877709523e-08,
      "loss": 0.3068,
      "step": 4895
    },
    {
      "epoch": 0.0482,
      "grad_norm": 0.38592246174812317,
      "learning_rate": 1.3173179560257432e-08,
      "loss": 0.2879,
      "step": 4896
    },
    {
      "epoch": 0.0484,
      "grad_norm": 0.579636812210083,
      "learning_rate": 1.292117576161711e-08,
      "loss": 0.2984,
      "step": 4897
    },
    {
      "epoch": 0.0486,
      "grad_norm": 0.5716261267662048,
      "learning_rate": 1.2671602604612531e-08,
      "loss": 0.3324,
      "step": 4898
    },
    {
      "epoch": 0.0488,
      "grad_norm": 0.5170826315879822,
      "learning_rate": 1.2424460210881394e-08,
      "loss": 0.3189,
      "step": 4899
    },
    {
      "epoch": 0.049,
      "grad_norm": 0.4849421977996826,
      "learning_rate": 1.2179748700879013e-08,
      "loss": 0.3352,
      "step": 4900
    },
    {
      "epoch": 0.0492,
      "grad_norm": 0.49983006715774536,
      "learning_rate": 1.1937468193873869e-08,
      "loss": 0.3465,
      "step": 4901
    },
    {
      "epoch": 0.0494,
      "grad_norm": 0.47212904691696167,
      "learning_rate": 1.1697618807951504e-08,
      "loss": 0.3636,
      "step": 4902
    },
    {
      "epoch": 0.0496,
      "grad_norm": 0.4085179567337036,
      "learning_rate": 1.146020066001119e-08,
      "loss": 0.3519,
      "step": 4903
    },
    {
      "epoch": 0.0498,
      "grad_norm": 0.4908648729324341,
      "learning_rate": 1.1225213865767026e-08,
      "loss": 0.3099,
      "step": 4904
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.42038729786872864,
      "learning_rate": 1.0992658539750179e-08,
      "loss": 0.3233,
      "step": 4905
    },
    {
      "epoch": 0.0502,
      "grad_norm": 0.487883061170578,
      "learning_rate": 1.076253479530387e-08,
      "loss": 0.3512,
      "step": 4906
    },
    {
      "epoch": 0.0504,
      "grad_norm": 0.35590824484825134,
      "learning_rate": 1.0534842744588381e-08,
      "loss": 0.3019,
      "step": 4907
    },
    {
      "epoch": 0.0506,
      "grad_norm": 0.4327605366706848,
      "learning_rate": 1.030958249857772e-08,
      "loss": 0.3227,
      "step": 4908
    },
    {
      "epoch": 0.0508,
      "grad_norm": 0.4491024911403656,
      "learning_rate": 1.008675416706073e-08,
      "loss": 0.3253,
      "step": 4909
    },
    {
      "epoch": 0.051,
      "grad_norm": 0.5044535994529724,
      "learning_rate": 9.866357858642206e-09,
      "loss": 0.3486,
      "step": 4910
    },
    {
      "epoch": 0.0512,
      "grad_norm": 0.7816162705421448,
      "learning_rate": 9.64839368074011e-09,
      "loss": 0.32,
      "step": 4911
    },
    {
      "epoch": 0.0514,
      "grad_norm": 0.45533493161201477,
      "learning_rate": 9.432861739586685e-09,
      "loss": 0.3181,
      "step": 4912
    },
    {
      "epoch": 0.0516,
      "grad_norm": 0.46937549114227295,
      "learning_rate": 9.219762140231237e-09,
      "loss": 0.3715,
      "step": 4913
    },
    {
      "epoch": 0.0518,
      "grad_norm": 0.45291945338249207,
      "learning_rate": 9.009094986534572e-09,
      "loss": 0.3432,
      "step": 4914
    },
    {
      "epoch": 0.052,
      "grad_norm": 0.4417332112789154,
      "learning_rate": 8.800860381173448e-09,
      "loss": 0.3067,
      "step": 4915
    },
    {
      "epoch": 0.0522,
      "grad_norm": 0.9633529782295227,
      "learning_rate": 8.595058425640012e-09,
      "loss": 0.3382,
      "step": 4916
    },
    {
      "epoch": 0.0524,
      "grad_norm": 0.40231022238731384,
      "learning_rate": 8.391689220238474e-09,
      "loss": 0.3294,
      "step": 4917
    },
    {
      "epoch": 0.0526,
      "grad_norm": 0.4014762043952942,
      "learning_rate": 8.190752864088436e-09,
      "loss": 0.3167,
      "step": 4918
    },
    {
      "epoch": 0.0528,
      "grad_norm": 0.6490775346755981,
      "learning_rate": 7.992249455124889e-09,
      "loss": 0.3923,
      "step": 4919
    },
    {
      "epoch": 0.053,
      "grad_norm": 0.5105748176574707,
      "learning_rate": 7.796179090094891e-09,
      "loss": 0.3303,
      "step": 4920
    },
    {
      "epoch": 0.0532,
      "grad_norm": 0.4454926550388336,
      "learning_rate": 7.602541864561442e-09,
      "loss": 0.3237,
      "step": 4921
    },
    {
      "epoch": 0.0534,
      "grad_norm": 0.42467546463012695,
      "learning_rate": 7.411337872900715e-09,
      "loss": 0.3229,
      "step": 4922
    },
    {
      "epoch": 0.0536,
      "grad_norm": 0.4126625955104828,
      "learning_rate": 7.222567208303721e-09,
      "loss": 0.3053,
      "step": 4923
    },
    {
      "epoch": 0.0538,
      "grad_norm": 0.4928229749202728,
      "learning_rate": 7.036229962774088e-09,
      "loss": 0.328,
      "step": 4924
    },
    {
      "epoch": 0.054,
      "grad_norm": 0.46452510356903076,
      "learning_rate": 6.852326227130835e-09,
      "loss": 0.3093,
      "step": 4925
    },
    {
      "epoch": 0.0542,
      "grad_norm": 0.4756675958633423,
      "learning_rate": 6.670856091006151e-09,
      "loss": 0.297,
      "step": 4926
    },
    {
      "epoch": 0.0544,
      "grad_norm": 0.4980093240737915,
      "learning_rate": 6.491819642846509e-09,
      "loss": 0.2987,
      "step": 4927
    },
    {
      "epoch": 0.0546,
      "grad_norm": 0.41867223381996155,
      "learning_rate": 6.315216969912663e-09,
      "loss": 0.3355,
      "step": 4928
    },
    {
      "epoch": 0.0548,
      "grad_norm": 0.4832548201084137,
      "learning_rate": 6.141048158277429e-09,
      "loss": 0.3284,
      "step": 4929
    },
    {
      "epoch": 0.055,
      "grad_norm": 0.4733595550060272,
      "learning_rate": 5.969313292830126e-09,
      "loss": 0.3534,
      "step": 4930
    },
    {
      "epoch": 0.0552,
      "grad_norm": 0.5251695513725281,
      "learning_rate": 5.800012457270466e-09,
      "loss": 0.3297,
      "step": 4931
    },
    {
      "epoch": 0.0554,
      "grad_norm": 0.3926279842853546,
      "learning_rate": 5.633145734114665e-09,
      "loss": 0.337,
      "step": 4932
    },
    {
      "epoch": 0.0556,
      "grad_norm": 0.45747828483581543,
      "learning_rate": 5.468713204692111e-09,
      "loss": 0.363,
      "step": 4933
    },
    {
      "epoch": 0.0558,
      "grad_norm": 0.6105964779853821,
      "learning_rate": 5.306714949143699e-09,
      "loss": 0.3583,
      "step": 4934
    },
    {
      "epoch": 0.056,
      "grad_norm": 0.4674024283885956,
      "learning_rate": 5.147151046426824e-09,
      "loss": 0.367,
      "step": 4935
    },
    {
      "epoch": 0.0562,
      "grad_norm": 0.4790174663066864,
      "learning_rate": 4.990021574309834e-09,
      "loss": 0.336,
      "step": 4936
    },
    {
      "epoch": 0.0564,
      "grad_norm": 0.4785774052143097,
      "learning_rate": 4.835326609376468e-09,
      "loss": 0.3435,
      "step": 4937
    },
    {
      "epoch": 0.0566,
      "grad_norm": 0.4427609443664551,
      "learning_rate": 4.683066227023081e-09,
      "loss": 0.3029,
      "step": 4938
    },
    {
      "epoch": 0.0568,
      "grad_norm": 0.3924143314361572,
      "learning_rate": 4.533240501459202e-09,
      "loss": 0.3355,
      "step": 4939
    },
    {
      "epoch": 0.057,
      "grad_norm": 0.4228701591491699,
      "learning_rate": 4.385849505708084e-09,
      "loss": 0.3229,
      "step": 4940
    },
    {
      "epoch": 0.0572,
      "grad_norm": 0.370103657245636,
      "learning_rate": 4.2408933116072635e-09,
      "loss": 0.2895,
      "step": 4941
    },
    {
      "epoch": 0.0574,
      "grad_norm": 0.5361455082893372,
      "learning_rate": 4.098371989805227e-09,
      "loss": 0.3585,
      "step": 4942
    },
    {
      "epoch": 0.0576,
      "grad_norm": 0.42400437593460083,
      "learning_rate": 3.9582856097658554e-09,
      "loss": 0.3401,
      "step": 4943
    },
    {
      "epoch": 0.0578,
      "grad_norm": 0.4305545687675476,
      "learning_rate": 3.820634239765642e-09,
      "loss": 0.3045,
      "step": 4944
    },
    {
      "epoch": 0.058,
      "grad_norm": 0.5606425404548645,
      "learning_rate": 3.685417946894254e-09,
      "loss": 0.3028,
      "step": 4945
    },
    {
      "epoch": 0.0582,
      "grad_norm": 0.43897804617881775,
      "learning_rate": 3.5526367970539765e-09,
      "loss": 0.3278,
      "step": 4946
    },
    {
      "epoch": 0.0584,
      "grad_norm": 0.42427030205726624,
      "learning_rate": 3.4222908549608193e-09,
      "loss": 0.3166,
      "step": 4947
    },
    {
      "epoch": 0.0586,
      "grad_norm": 0.33626097440719604,
      "learning_rate": 3.294380184143964e-09,
      "loss": 0.33,
      "step": 4948
    },
    {
      "epoch": 0.0588,
      "grad_norm": 0.6974889636039734,
      "learning_rate": 3.1689048469457638e-09,
      "loss": 0.3201,
      "step": 4949
    },
    {
      "epoch": 0.059,
      "grad_norm": 0.44286221265792847,
      "learning_rate": 3.0458649045211897e-09,
      "loss": 0.3209,
      "step": 4950
    },
    {
      "epoch": 0.0592,
      "grad_norm": 0.49861469864845276,
      "learning_rate": 2.9252604168383826e-09,
      "loss": 0.3265,
      "step": 4951
    },
    {
      "epoch": 0.0594,
      "grad_norm": 0.3376009166240692,
      "learning_rate": 2.8070914426786555e-09,
      "loss": 0.3146,
      "step": 4952
    },
    {
      "epoch": 0.0596,
      "grad_norm": 0.45880743861198425,
      "learning_rate": 2.6913580396359384e-09,
      "loss": 0.3492,
      "step": 4953
    },
    {
      "epoch": 0.0598,
      "grad_norm": 0.4424988925457001,
      "learning_rate": 2.5780602641167774e-09,
      "loss": 0.3543,
      "step": 4954
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5060993432998657,
      "learning_rate": 2.4671981713420003e-09,
      "loss": 0.3395,
      "step": 4955
    },
    {
      "epoch": 0.0602,
      "grad_norm": 0.423878937959671,
      "learning_rate": 2.358771815344496e-09,
      "loss": 0.316,
      "step": 4956
    },
    {
      "epoch": 0.0604,
      "grad_norm": 0.49834784865379333,
      "learning_rate": 2.2527812489692156e-09,
      "loss": 0.3559,
      "step": 4957
    },
    {
      "epoch": 0.0606,
      "grad_norm": 0.4854443073272705,
      "learning_rate": 2.149226523874837e-09,
      "loss": 0.3266,
      "step": 4958
    },
    {
      "epoch": 0.0608,
      "grad_norm": 0.8847061395645142,
      "learning_rate": 2.0481076905332074e-09,
      "loss": 0.3522,
      "step": 4959
    },
    {
      "epoch": 0.061,
      "grad_norm": 0.5715512037277222,
      "learning_rate": 1.9494247982282386e-09,
      "loss": 0.3469,
      "step": 4960
    },
    {
      "epoch": 0.0612,
      "grad_norm": 0.4376908838748932,
      "learning_rate": 1.8531778950564572e-09,
      "loss": 0.3355,
      "step": 4961
    },
    {
      "epoch": 0.0614,
      "grad_norm": 0.6655395030975342,
      "learning_rate": 1.759367027927561e-09,
      "loss": 0.3253,
      "step": 4962
    },
    {
      "epoch": 0.0616,
      "grad_norm": 0.5357441902160645,
      "learning_rate": 1.6679922425638651e-09,
      "loss": 0.347,
      "step": 4963
    },
    {
      "epoch": 0.0618,
      "grad_norm": 0.8626128435134888,
      "learning_rate": 1.5790535835003006e-09,
      "loss": 0.3086,
      "step": 4964
    },
    {
      "epoch": 0.062,
      "grad_norm": 0.5655360221862793,
      "learning_rate": 1.4925510940844157e-09,
      "loss": 0.3333,
      "step": 4965
    },
    {
      "epoch": 0.0622,
      "grad_norm": 0.43715015053749084,
      "learning_rate": 1.4084848164763742e-09,
      "loss": 0.3598,
      "step": 4966
    },
    {
      "epoch": 0.0624,
      "grad_norm": 0.8096640110015869,
      "learning_rate": 1.3268547916495124e-09,
      "loss": 0.2991,
      "step": 4967
    },
    {
      "epoch": 0.0626,
      "grad_norm": 0.428720623254776,
      "learning_rate": 1.247661059389227e-09,
      "loss": 0.3203,
      "step": 4968
    },
    {
      "epoch": 0.0628,
      "grad_norm": 0.45139968395233154,
      "learning_rate": 1.170903658293532e-09,
      "loss": 0.3214,
      "step": 4969
    },
    {
      "epoch": 0.063,
      "grad_norm": 0.4213252365589142,
      "learning_rate": 1.096582625772502e-09,
      "loss": 0.3416,
      "step": 4970
    },
    {
      "epoch": 0.0632,
      "grad_norm": 0.6678926348686218,
      "learning_rate": 1.0246979980499395e-09,
      "loss": 0.3021,
      "step": 4971
    },
    {
      "epoch": 0.0634,
      "grad_norm": 0.39185020327568054,
      "learning_rate": 9.55249810161152e-10,
      "loss": 0.3324,
      "step": 4972
    },
    {
      "epoch": 0.0636,
      "grad_norm": 0.4188025891780853,
      "learning_rate": 8.88238095955174e-10,
      "loss": 0.3227,
      "step": 4973
    },
    {
      "epoch": 0.0638,
      "grad_norm": 0.5595338344573975,
      "learning_rate": 8.236628880914365e-10,
      "loss": 0.317,
      "step": 4974
    },
    {
      "epoch": 0.064,
      "grad_norm": 0.5305880308151245,
      "learning_rate": 7.615242180436521e-10,
      "loss": 0.3479,
      "step": 4975
    },
    {
      "epoch": 0.0642,
      "grad_norm": 0.36694785952568054,
      "learning_rate": 7.018221160981498e-10,
      "loss": 0.3028,
      "step": 4976
    },
    {
      "epoch": 0.0644,
      "grad_norm": 0.45368918776512146,
      "learning_rate": 6.445566113516544e-10,
      "loss": 0.3449,
      "step": 4977
    },
    {
      "epoch": 0.0646,
      "grad_norm": 0.564319908618927,
      "learning_rate": 5.897277317157279e-10,
      "loss": 0.3027,
      "step": 4978
    },
    {
      "epoch": 0.0648,
      "grad_norm": 0.35472235083580017,
      "learning_rate": 5.373355039128836e-10,
      "loss": 0.2952,
      "step": 4979
    },
    {
      "epoch": 0.065,
      "grad_norm": 0.41132456064224243,
      "learning_rate": 4.87379953478806e-10,
      "loss": 0.3367,
      "step": 4980
    },
    {
      "epoch": 0.0652,
      "grad_norm": 0.6116701364517212,
      "learning_rate": 4.398611047612411e-10,
      "loss": 0.3004,
      "step": 4981
    },
    {
      "epoch": 0.0654,
      "grad_norm": 0.41144222021102905,
      "learning_rate": 3.9477898091944135e-10,
      "loss": 0.3382,
      "step": 4982
    },
    {
      "epoch": 0.0656,
      "grad_norm": 0.43141859769821167,
      "learning_rate": 3.521336039263856e-10,
      "loss": 0.3395,
      "step": 4983
    },
    {
      "epoch": 0.0658,
      "grad_norm": 0.4901978671550751,
      "learning_rate": 3.1192499456766947e-10,
      "loss": 0.3364,
      "step": 4984
    },
    {
      "epoch": 0.066,
      "grad_norm": 0.3768532872200012,
      "learning_rate": 2.741531724392843e-10,
      "loss": 0.2824,
      "step": 4985
    },
    {
      "epoch": 0.0662,
      "grad_norm": 0.5490062832832336,
      "learning_rate": 2.388181559515035e-10,
      "loss": 0.3206,
      "step": 4986
    },
    {
      "epoch": 0.0664,
      "grad_norm": 0.3662363588809967,
      "learning_rate": 2.0591996232610656e-10,
      "loss": 0.2874,
      "step": 4987
    },
    {
      "epoch": 0.0666,
      "grad_norm": 0.4574631154537201,
      "learning_rate": 1.7545860759693446e-10,
      "loss": 0.3434,
      "step": 4988
    },
    {
      "epoch": 0.0668,
      "grad_norm": 0.48544660210609436,
      "learning_rate": 1.4743410661044454e-10,
      "loss": 0.3503,
      "step": 4989
    },
    {
      "epoch": 0.067,
      "grad_norm": 0.4120025634765625,
      "learning_rate": 1.2184647302626585e-10,
      "loss": 0.3769,
      "step": 4990
    },
    {
      "epoch": 0.0672,
      "grad_norm": 1.5022995471954346,
      "learning_rate": 9.869571931442334e-11,
      "loss": 0.3176,
      "step": 4991
    },
    {
      "epoch": 0.0674,
      "grad_norm": 0.5623800158500671,
      "learning_rate": 7.798185675866876e-11,
      "loss": 0.3568,
      "step": 4992
    },
    {
      "epoch": 0.0676,
      "grad_norm": 0.4185653030872345,
      "learning_rate": 5.970489545537028e-11,
      "loss": 0.3311,
      "step": 4993
    },
    {
      "epoch": 0.0678,
      "grad_norm": 0.47084739804267883,
      "learning_rate": 4.3864844311847235e-11,
      "loss": 0.3294,
      "step": 4994
    },
    {
      "epoch": 0.068,
      "grad_norm": 0.37336456775665283,
      "learning_rate": 3.0461711048035415e-11,
      "loss": 0.2852,
      "step": 4995
    },
    {
      "epoch": 0.0682,
      "grad_norm": 0.47971728444099426,
      "learning_rate": 1.9495502197042214e-11,
      "loss": 0.3635,
      "step": 4996
    },
    {
      "epoch": 0.0684,
      "grad_norm": 0.5644340515136719,
      "learning_rate": 1.0966223103481278e-11,
      "loss": 0.2803,
      "step": 4997
    },
    {
      "epoch": 0.0686,
      "grad_norm": 0.6819469928741455,
      "learning_rate": 4.873877924582715e-12,
      "loss": 0.3181,
      "step": 4998
    },
    {
      "epoch": 0.0688,
      "grad_norm": 0.4005674123764038,
      "learning_rate": 1.2184696296380083e-12,
      "loss": 0.3021,
      "step": 4999
    },
    {
      "epoch": 0.069,
      "grad_norm": 0.46513688564300537,
      "learning_rate": 0.0,
      "loss": 0.2968,
      "step": 5000
    },
    {
      "epoch": 0.069,
      "step": 5000,
      "total_flos": 5.6478980657499236e+20,
      "train_loss": 0.02277738807797432,
      "train_runtime": 3535.7374,
      "train_samples_per_second": 362.018,
      "train_steps_per_second": 1.414
    }
  ],
  "logging_steps": 1.0,
  "max_steps": 5000,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 9223372036854775807,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 5.6478980657499236e+20,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}
