| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 39.95179987797437, |
| "eval_steps": 100.0, |
| "global_step": 32760, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.6101281269066504, |
| "grad_norm": 41.414833068847656, |
| "learning_rate": 1.188e-06, |
| "loss": 21.6118, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_cer": 1.1283783783783783, |
| "eval_loss": 8.950940132141113, |
| "eval_runtime": 87.6114, |
| "eval_samples_per_second": 78.072, |
| "eval_steps_per_second": 9.759, |
| "eval_wer": 1.0, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.2196461256863942, |
| "grad_norm": 35.01482009887695, |
| "learning_rate": 2.3880000000000003e-06, |
| "loss": 9.1302, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.8297742525930445, |
| "grad_norm": 27.13286590576172, |
| "learning_rate": 3.588e-06, |
| "loss": 7.4878, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_cer": 1.1284376481744902, |
| "eval_loss": 6.385559558868408, |
| "eval_runtime": 76.6753, |
| "eval_samples_per_second": 89.207, |
| "eval_steps_per_second": 11.151, |
| "eval_wer": 1.0, |
| "step": 1640 |
| }, |
| { |
| "epoch": 2.4392922513727884, |
| "grad_norm": 13.3228120803833, |
| "learning_rate": 4.788e-06, |
| "loss": 5.9004, |
| "step": 2000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_cer": 1.1284376481744902, |
| "eval_loss": 3.928687572479248, |
| "eval_runtime": 74.707, |
| "eval_samples_per_second": 91.558, |
| "eval_steps_per_second": 11.445, |
| "eval_wer": 1.0, |
| "step": 2460 |
| }, |
| { |
| "epoch": 3.048810250152532, |
| "grad_norm": 6.096343994140625, |
| "learning_rate": 5.988e-06, |
| "loss": 4.3944, |
| "step": 2500 |
| }, |
| { |
| "epoch": 3.6589383770591826, |
| "grad_norm": 3.3120861053466797, |
| "learning_rate": 7.1880000000000005e-06, |
| "loss": 3.4882, |
| "step": 3000 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_cer": 1.1284376481744902, |
| "eval_loss": 2.909001588821411, |
| "eval_runtime": 77.6836, |
| "eval_samples_per_second": 88.05, |
| "eval_steps_per_second": 11.006, |
| "eval_wer": 1.0, |
| "step": 3280 |
| }, |
| { |
| "epoch": 4.268456375838926, |
| "grad_norm": 1.7695776224136353, |
| "learning_rate": 8.388e-06, |
| "loss": 2.963, |
| "step": 3500 |
| }, |
| { |
| "epoch": 4.878584502745577, |
| "grad_norm": 1.4808818101882935, |
| "learning_rate": 9.588e-06, |
| "loss": 2.6365, |
| "step": 4000 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_cer": 1.1284376481744902, |
| "eval_loss": 2.3862359523773193, |
| "eval_runtime": 88.7308, |
| "eval_samples_per_second": 77.087, |
| "eval_steps_per_second": 9.636, |
| "eval_wer": 1.0, |
| "step": 4100 |
| }, |
| { |
| "epoch": 5.48810250152532, |
| "grad_norm": 5.100676536560059, |
| "learning_rate": 1.0787999999999999e-05, |
| "loss": 2.2815, |
| "step": 4500 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_cer": 1.2580903271692745, |
| "eval_loss": 1.4563010931015015, |
| "eval_runtime": 88.3297, |
| "eval_samples_per_second": 77.437, |
| "eval_steps_per_second": 9.68, |
| "eval_wer": 1.0, |
| "step": 4920 |
| }, |
| { |
| "epoch": 6.097620500305064, |
| "grad_norm": 2.4356448650360107, |
| "learning_rate": 1.1988000000000001e-05, |
| "loss": 1.7295, |
| "step": 5000 |
| }, |
| { |
| "epoch": 6.707748627211714, |
| "grad_norm": 4.461264133453369, |
| "learning_rate": 1.3188e-05, |
| "loss": 1.0892, |
| "step": 5500 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_cer": 1.0564248458985301, |
| "eval_loss": 0.42875832319259644, |
| "eval_runtime": 102.6377, |
| "eval_samples_per_second": 66.642, |
| "eval_steps_per_second": 8.33, |
| "eval_wer": 0.9998538011695907, |
| "step": 5740 |
| }, |
| { |
| "epoch": 7.317266625991458, |
| "grad_norm": 1.982809066772461, |
| "learning_rate": 1.4388000000000002e-05, |
| "loss": 0.6362, |
| "step": 6000 |
| }, |
| { |
| "epoch": 7.927394752898109, |
| "grad_norm": 2.818439483642578, |
| "learning_rate": 1.5588e-05, |
| "loss": 0.4741, |
| "step": 6500 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_cer": 1.0188477951635846, |
| "eval_loss": 0.18962983787059784, |
| "eval_runtime": 91.2741, |
| "eval_samples_per_second": 74.939, |
| "eval_steps_per_second": 9.367, |
| "eval_wer": 0.9994152046783625, |
| "step": 6560 |
| }, |
| { |
| "epoch": 8.536912751677852, |
| "grad_norm": 11.691442489624023, |
| "learning_rate": 1.6788e-05, |
| "loss": 0.3822, |
| "step": 7000 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_cer": 1.0211593172119489, |
| "eval_loss": 0.17385753989219666, |
| "eval_runtime": 89.6334, |
| "eval_samples_per_second": 76.311, |
| "eval_steps_per_second": 9.539, |
| "eval_wer": 0.9989766081871345, |
| "step": 7380 |
| }, |
| { |
| "epoch": 9.146430750457595, |
| "grad_norm": 5.473918914794922, |
| "learning_rate": 1.7988e-05, |
| "loss": 0.34, |
| "step": 7500 |
| }, |
| { |
| "epoch": 9.756558877364247, |
| "grad_norm": 3.1958296298980713, |
| "learning_rate": 1.9188e-05, |
| "loss": 0.3101, |
| "step": 8000 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_cer": 1.0287754860123282, |
| "eval_loss": 0.17125986516475677, |
| "eval_runtime": 101.7075, |
| "eval_samples_per_second": 67.252, |
| "eval_steps_per_second": 8.406, |
| "eval_wer": 0.9988304093567252, |
| "step": 8200 |
| }, |
| { |
| "epoch": 10.36607687614399, |
| "grad_norm": 3.826345443725586, |
| "learning_rate": 2.0388e-05, |
| "loss": 0.2837, |
| "step": 8500 |
| }, |
| { |
| "epoch": 10.97620500305064, |
| "grad_norm": 4.758739471435547, |
| "learning_rate": 2.1588e-05, |
| "loss": 0.2644, |
| "step": 9000 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_cer": 1.0217816500711236, |
| "eval_loss": 0.11893380433320999, |
| "eval_runtime": 97.2791, |
| "eval_samples_per_second": 70.313, |
| "eval_steps_per_second": 8.789, |
| "eval_wer": 0.9988304093567252, |
| "step": 9020 |
| }, |
| { |
| "epoch": 11.585723001830385, |
| "grad_norm": 5.496431350708008, |
| "learning_rate": 2.2788000000000003e-05, |
| "loss": 0.2476, |
| "step": 9500 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_cer": 1.0172178757705073, |
| "eval_loss": 0.05396101996302605, |
| "eval_runtime": 100.4809, |
| "eval_samples_per_second": 68.073, |
| "eval_steps_per_second": 8.509, |
| "eval_wer": 0.9988304093567252, |
| "step": 9840 |
| }, |
| { |
| "epoch": 12.195241000610128, |
| "grad_norm": 3.7265303134918213, |
| "learning_rate": 2.3988e-05, |
| "loss": 0.2479, |
| "step": 10000 |
| }, |
| { |
| "epoch": 12.805369127516778, |
| "grad_norm": 1.958551287651062, |
| "learning_rate": 2.5188e-05, |
| "loss": 0.2302, |
| "step": 10500 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_cer": 1.0152027027027026, |
| "eval_loss": 0.029684651643037796, |
| "eval_runtime": 80.6829, |
| "eval_samples_per_second": 84.776, |
| "eval_steps_per_second": 10.597, |
| "eval_wer": 0.9988304093567252, |
| "step": 10660 |
| }, |
| { |
| "epoch": 13.414887126296522, |
| "grad_norm": 7.186318397521973, |
| "learning_rate": 2.6388000000000002e-05, |
| "loss": 0.2182, |
| "step": 11000 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_cer": 1.0188477951635846, |
| "eval_loss": 0.043104566633701324, |
| "eval_runtime": 66.2916, |
| "eval_samples_per_second": 103.18, |
| "eval_steps_per_second": 12.898, |
| "eval_wer": 0.9988304093567252, |
| "step": 11480 |
| }, |
| { |
| "epoch": 14.024405125076266, |
| "grad_norm": 5.963690757751465, |
| "learning_rate": 2.7588e-05, |
| "loss": 0.2228, |
| "step": 11500 |
| }, |
| { |
| "epoch": 14.634533251982916, |
| "grad_norm": 5.593617916107178, |
| "learning_rate": 2.8788e-05, |
| "loss": 0.2154, |
| "step": 12000 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_cer": 1.0157657657657657, |
| "eval_loss": 0.017413927242159843, |
| "eval_runtime": 61.3441, |
| "eval_samples_per_second": 111.502, |
| "eval_steps_per_second": 13.938, |
| "eval_wer": 0.9988304093567252, |
| "step": 12300 |
| }, |
| { |
| "epoch": 15.24405125076266, |
| "grad_norm": 3.418288230895996, |
| "learning_rate": 2.99856e-05, |
| "loss": 0.22, |
| "step": 12500 |
| }, |
| { |
| "epoch": 15.854179377669311, |
| "grad_norm": 5.651284694671631, |
| "learning_rate": 2.9956190887883116e-05, |
| "loss": 0.2072, |
| "step": 13000 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_cer": 1.0153805120910384, |
| "eval_loss": 0.015717793256044388, |
| "eval_runtime": 60.2687, |
| "eval_samples_per_second": 113.492, |
| "eval_steps_per_second": 14.186, |
| "eval_wer": 0.9991228070175439, |
| "step": 13120 |
| }, |
| { |
| "epoch": 16.463697376449055, |
| "grad_norm": 4.6377177238464355, |
| "learning_rate": 2.982253104799521e-05, |
| "loss": 0.1986, |
| "step": 13500 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_cer": 1.0149359886201992, |
| "eval_loss": 0.027271753177046776, |
| "eval_runtime": 61.2915, |
| "eval_samples_per_second": 111.598, |
| "eval_steps_per_second": 13.95, |
| "eval_wer": 0.9989766081871345, |
| "step": 13940 |
| }, |
| { |
| "epoch": 17.0732153752288, |
| "grad_norm": 0.13217875361442566, |
| "learning_rate": 2.9599814696946643e-05, |
| "loss": 0.2056, |
| "step": 14000 |
| }, |
| { |
| "epoch": 17.683343502135447, |
| "grad_norm": 0.8718374371528625, |
| "learning_rate": 2.9289379955813937e-05, |
| "loss": 0.1919, |
| "step": 14500 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_cer": 1.014461830251304, |
| "eval_loss": 0.01104104146361351, |
| "eval_runtime": 61.3175, |
| "eval_samples_per_second": 111.551, |
| "eval_steps_per_second": 13.944, |
| "eval_wer": 0.9988304093567252, |
| "step": 14760 |
| }, |
| { |
| "epoch": 18.29286150091519, |
| "grad_norm": 6.6985273361206055, |
| "learning_rate": 2.8893091974003682e-05, |
| "loss": 0.1776, |
| "step": 15000 |
| }, |
| { |
| "epoch": 18.902989627821842, |
| "grad_norm": 6.272310733795166, |
| "learning_rate": 2.841333172308954e-05, |
| "loss": 0.1763, |
| "step": 15500 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_cer": 1.014432195353248, |
| "eval_loss": 0.014498263597488403, |
| "eval_runtime": 61.4346, |
| "eval_samples_per_second": 111.338, |
| "eval_steps_per_second": 13.917, |
| "eval_wer": 0.9988304093567252, |
| "step": 15580 |
| }, |
| { |
| "epoch": 19.512507626601586, |
| "grad_norm": 1.4895048141479492, |
| "learning_rate": 2.785418066112353e-05, |
| "loss": 0.1759, |
| "step": 16000 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_cer": 1.016714082503556, |
| "eval_loss": 0.07009146362543106, |
| "eval_runtime": 62.172, |
| "eval_samples_per_second": 110.017, |
| "eval_steps_per_second": 13.752, |
| "eval_wer": 0.9988304093567252, |
| "step": 16400 |
| }, |
| { |
| "epoch": 20.12202562538133, |
| "grad_norm": 4.595536231994629, |
| "learning_rate": 2.7216758309791792e-05, |
| "loss": 0.1829, |
| "step": 16500 |
| }, |
| { |
| "epoch": 20.73215375228798, |
| "grad_norm": 6.24274206161499, |
| "learning_rate": 2.6505935412410244e-05, |
| "loss": 0.1673, |
| "step": 17000 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_cer": 1.0136024182076813, |
| "eval_loss": 0.012840056791901588, |
| "eval_runtime": 62.0714, |
| "eval_samples_per_second": 110.196, |
| "eval_steps_per_second": 13.774, |
| "eval_wer": 0.9988304093567252, |
| "step": 17220 |
| }, |
| { |
| "epoch": 21.341671751067725, |
| "grad_norm": 4.522493362426758, |
| "learning_rate": 2.5725982724566367e-05, |
| "loss": 0.162, |
| "step": 17500 |
| }, |
| { |
| "epoch": 21.951799877974373, |
| "grad_norm": 6.085182189941406, |
| "learning_rate": 2.4881586346429215e-05, |
| "loss": 0.157, |
| "step": 18000 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_cer": 1.014432195353248, |
| "eval_loss": 0.013646350242197514, |
| "eval_runtime": 61.3925, |
| "eval_samples_per_second": 111.414, |
| "eval_steps_per_second": 13.927, |
| "eval_wer": 0.9988304093567252, |
| "step": 18040 |
| }, |
| { |
| "epoch": 22.561317876754117, |
| "grad_norm": 0.7235033512115479, |
| "learning_rate": 2.3977819567791885e-05, |
| "loss": 0.1642, |
| "step": 18500 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_cer": 1.0074383594120435, |
| "eval_loss": 0.1374504715204239, |
| "eval_runtime": 59.8534, |
| "eval_samples_per_second": 114.279, |
| "eval_steps_per_second": 14.285, |
| "eval_wer": 0.9988304093567252, |
| "step": 18860 |
| }, |
| { |
| "epoch": 23.17083587553386, |
| "grad_norm": 1.7966482639312744, |
| "learning_rate": 2.3022077859705676e-05, |
| "loss": 0.155, |
| "step": 19000 |
| }, |
| { |
| "epoch": 23.780964002440513, |
| "grad_norm": 4.931192874908447, |
| "learning_rate": 2.2016274790151287e-05, |
| "loss": 0.1529, |
| "step": 19500 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_cer": 1.0139580369843528, |
| "eval_loss": 0.013449819758534431, |
| "eval_runtime": 58.9726, |
| "eval_samples_per_second": 115.986, |
| "eval_steps_per_second": 14.498, |
| "eval_wer": 0.9988304093567252, |
| "step": 19680 |
| }, |
| { |
| "epoch": 24.390482001220256, |
| "grad_norm": 0.1271979659795761, |
| "learning_rate": 2.0968316642484253e-05, |
| "loss": 0.1501, |
| "step": 20000 |
| }, |
| { |
| "epoch": 25.0, |
| "grad_norm": 3.955129623413086, |
| "learning_rate": 1.9884499743301647e-05, |
| "loss": 0.1511, |
| "step": 20500 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_cer": 1.013839497392129, |
| "eval_loss": 0.007342994213104248, |
| "eval_runtime": 61.5659, |
| "eval_samples_per_second": 111.101, |
| "eval_steps_per_second": 13.888, |
| "eval_wer": 0.9989766081871345, |
| "step": 20500 |
| }, |
| { |
| "epoch": 25.610128126906652, |
| "grad_norm": 4.402960300445557, |
| "learning_rate": 1.8771335865219483e-05, |
| "loss": 0.1415, |
| "step": 21000 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_cer": 1.0136320531057372, |
| "eval_loss": 0.006196827162057161, |
| "eval_runtime": 63.0154, |
| "eval_samples_per_second": 108.545, |
| "eval_steps_per_second": 13.568, |
| "eval_wer": 0.9988304093567252, |
| "step": 21320 |
| }, |
| { |
| "epoch": 26.219646125686396, |
| "grad_norm": 5.023196220397949, |
| "learning_rate": 1.7635513102937044e-05, |
| "loss": 0.1329, |
| "step": 21500 |
| }, |
| { |
| "epoch": 26.829774252593044, |
| "grad_norm": 4.451430797576904, |
| "learning_rate": 1.6483855689925534e-05, |
| "loss": 0.1338, |
| "step": 22000 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_cer": 1.0135135135135136, |
| "eval_loss": 0.006267285440117121, |
| "eval_runtime": 62.0749, |
| "eval_samples_per_second": 110.189, |
| "eval_steps_per_second": 13.774, |
| "eval_wer": 0.9988304093567252, |
| "step": 22140 |
| }, |
| { |
| "epoch": 27.439292251372787, |
| "grad_norm": 0.7792350649833679, |
| "learning_rate": 1.5325608410059234e-05, |
| "loss": 0.1373, |
| "step": 22500 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_cer": 1.01309862494073, |
| "eval_loss": 0.013936175964772701, |
| "eval_runtime": 59.8236, |
| "eval_samples_per_second": 114.336, |
| "eval_steps_per_second": 14.292, |
| "eval_wer": 0.9988304093567252, |
| "step": 22960 |
| }, |
| { |
| "epoch": 28.04881025015253, |
| "grad_norm": 1.8355393409729004, |
| "learning_rate": 1.4163090284146517e-05, |
| "loss": 0.128, |
| "step": 23000 |
| }, |
| { |
| "epoch": 28.658938377059183, |
| "grad_norm": 1.9466981887817383, |
| "learning_rate": 1.3005600466773616e-05, |
| "loss": 0.1224, |
| "step": 23500 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_cer": 1.0135135135135136, |
| "eval_loss": 0.0075889285653829575, |
| "eval_runtime": 61.2053, |
| "eval_samples_per_second": 111.755, |
| "eval_steps_per_second": 13.969, |
| "eval_wer": 0.9988304093567252, |
| "step": 23780 |
| }, |
| { |
| "epoch": 29.268456375838927, |
| "grad_norm": 2.3899123668670654, |
| "learning_rate": 1.186009337109073e-05, |
| "loss": 0.1245, |
| "step": 24000 |
| }, |
| { |
| "epoch": 29.878584502745575, |
| "grad_norm": 0.28489622473716736, |
| "learning_rate": 1.0733451415837331e-05, |
| "loss": 0.1217, |
| "step": 24500 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_cer": 1.013869132290185, |
| "eval_loss": 0.01911773718893528, |
| "eval_runtime": 61.6884, |
| "eval_samples_per_second": 110.88, |
| "eval_steps_per_second": 13.86, |
| "eval_wer": 0.9988304093567252, |
| "step": 24600 |
| }, |
| { |
| "epoch": 30.48810250152532, |
| "grad_norm": 6.990693092346191, |
| "learning_rate": 9.632443674496023e-06, |
| "loss": 0.119, |
| "step": 25000 |
| }, |
| { |
| "epoch": 31.0, |
| "eval_cer": 1.0134542437174017, |
| "eval_loss": 0.026616927236318588, |
| "eval_runtime": 61.1134, |
| "eval_samples_per_second": 111.923, |
| "eval_steps_per_second": 13.99, |
| "eval_wer": 0.9988304093567252, |
| "step": 25420 |
| }, |
| { |
| "epoch": 31.097620500305062, |
| "grad_norm": 1.3103210926055908, |
| "learning_rate": 8.563685205445662e-06, |
| "loss": 0.1102, |
| "step": 25500 |
| }, |
| { |
| "epoch": 31.707748627211714, |
| "grad_norm": 3.154486656188965, |
| "learning_rate": 7.533597307465705e-06, |
| "loss": 0.1122, |
| "step": 26000 |
| }, |
| { |
| "epoch": 32.0, |
| "eval_cer": 1.0135727833096255, |
| "eval_loss": 0.006075156386941671, |
| "eval_runtime": 58.6707, |
| "eval_samples_per_second": 116.583, |
| "eval_steps_per_second": 14.573, |
| "eval_wer": 0.9988304093567252, |
| "step": 26240 |
| }, |
| { |
| "epoch": 32.31726662599146, |
| "grad_norm": 0.17011937499046326, |
| "learning_rate": 6.550290643366546e-06, |
| "loss": 0.11, |
| "step": 26500 |
| }, |
| { |
| "epoch": 32.92739475289811, |
| "grad_norm": 0.06195596233010292, |
| "learning_rate": 5.615733971162722e-06, |
| "loss": 0.1077, |
| "step": 27000 |
| }, |
| { |
| "epoch": 33.0, |
| "eval_cer": 1.0133949739212897, |
| "eval_loss": 0.00502835214138031, |
| "eval_runtime": 61.3153, |
| "eval_samples_per_second": 111.555, |
| "eval_steps_per_second": 13.944, |
| "eval_wer": 0.9988304093567252, |
| "step": 27060 |
| }, |
| { |
| "epoch": 33.53691275167785, |
| "grad_norm": 5.214883804321289, |
| "learning_rate": 4.737559706904321e-06, |
| "loss": 0.1058, |
| "step": 27500 |
| }, |
| { |
| "epoch": 34.0, |
| "eval_cer": 1.0134838786154576, |
| "eval_loss": 0.0068115307949483395, |
| "eval_runtime": 61.475, |
| "eval_samples_per_second": 111.265, |
| "eval_steps_per_second": 13.908, |
| "eval_wer": 0.9988304093567252, |
| "step": 27880 |
| }, |
| { |
| "epoch": 34.1464307504576, |
| "grad_norm": 0.43510904908180237, |
| "learning_rate": 3.921044084178765e-06, |
| "loss": 0.1088, |
| "step": 28000 |
| }, |
| { |
| "epoch": 34.75655887736425, |
| "grad_norm": 1.829236388206482, |
| "learning_rate": 3.1725232868909293e-06, |
| "loss": 0.0992, |
| "step": 28500 |
| }, |
| { |
| "epoch": 35.0, |
| "eval_cer": 1.0135431484115696, |
| "eval_loss": 0.005784249398857355, |
| "eval_runtime": 61.4985, |
| "eval_samples_per_second": 111.222, |
| "eval_steps_per_second": 13.903, |
| "eval_wer": 0.9988304093567252, |
| "step": 28700 |
| }, |
| { |
| "epoch": 35.36607687614399, |
| "grad_norm": 2.7739064693450928, |
| "learning_rate": 2.493495989231198e-06, |
| "loss": 0.1082, |
| "step": 29000 |
| }, |
| { |
| "epoch": 35.97620500305064, |
| "grad_norm": 3.2704861164093018, |
| "learning_rate": 1.8896100834437107e-06, |
| "loss": 0.0977, |
| "step": 29500 |
| }, |
| { |
| "epoch": 36.0, |
| "eval_cer": 1.0134542437174017, |
| "eval_loss": 0.006506683304905891, |
| "eval_runtime": 60.4558, |
| "eval_samples_per_second": 113.14, |
| "eval_steps_per_second": 14.143, |
| "eval_wer": 0.9988304093567252, |
| "step": 29520 |
| }, |
| { |
| "epoch": 36.58572300183038, |
| "grad_norm": 1.9117754697799683, |
| "learning_rate": 1.3644938278693997e-06, |
| "loss": 0.093, |
| "step": 30000 |
| }, |
| { |
| "epoch": 37.0, |
| "eval_cer": 1.0133060692271219, |
| "eval_loss": 0.005826306063681841, |
| "eval_runtime": 61.6467, |
| "eval_samples_per_second": 110.955, |
| "eval_steps_per_second": 13.869, |
| "eval_wer": 0.9988304093567252, |
| "step": 30340 |
| }, |
| { |
| "epoch": 37.195241000610125, |
| "grad_norm": 0.03339027985930443, |
| "learning_rate": 9.213022182052699e-07, |
| "loss": 0.099, |
| "step": 30500 |
| }, |
| { |
| "epoch": 37.80536912751678, |
| "grad_norm": 0.10268145054578781, |
| "learning_rate": 5.626980317060648e-07, |
| "loss": 0.0959, |
| "step": 31000 |
| }, |
| { |
| "epoch": 38.0, |
| "eval_cer": 1.013276434329066, |
| "eval_loss": 0.005811047740280628, |
| "eval_runtime": 60.4371, |
| "eval_samples_per_second": 113.175, |
| "eval_steps_per_second": 14.147, |
| "eval_wer": 0.9988304093567252, |
| "step": 31160 |
| }, |
| { |
| "epoch": 38.41488712629652, |
| "grad_norm": 1.405590534210205, |
| "learning_rate": 2.912918111057888e-07, |
| "loss": 0.093, |
| "step": 31500 |
| }, |
| { |
| "epoch": 39.0, |
| "eval_cer": 1.0133060692271219, |
| "eval_loss": 0.005707182455807924, |
| "eval_runtime": 57.274, |
| "eval_samples_per_second": 119.426, |
| "eval_steps_per_second": 14.928, |
| "eval_wer": 0.9988304093567252, |
| "step": 31980 |
| }, |
| { |
| "epoch": 39.024405125076264, |
| "grad_norm": 0.9183106422424316, |
| "learning_rate": 1.0762696080869105e-07, |
| "loss": 0.0983, |
| "step": 32000 |
| }, |
| { |
| "epoch": 39.634533251982916, |
| "grad_norm": 15.433984756469727, |
| "learning_rate": 1.3536859442666582e-08, |
| "loss": 0.0951, |
| "step": 32500 |
| }, |
| { |
| "epoch": 39.95179987797437, |
| "eval_cer": 1.0133357041251778, |
| "eval_loss": 0.005610902328044176, |
| "eval_runtime": 56.5957, |
| "eval_samples_per_second": 120.857, |
| "eval_steps_per_second": 15.107, |
| "eval_wer": 0.9988304093567252, |
| "step": 32760 |
| }, |
| { |
| "epoch": 39.95179987797437, |
| "step": 32760, |
| "total_flos": 1.3506323652949156e+19, |
| "train_loss": 1.1069419495788686, |
| "train_runtime": 26304.0609, |
| "train_samples_per_second": 79.744, |
| "train_steps_per_second": 1.245 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 32760, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 40, |
| "save_steps": 400, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.3506323652949156e+19, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|