hatespeech-abusive-xlm-roberta-v1 / trainer_state.json
nahiar's picture
Initial upload (auto-create if missing)
cd6b354 verified
{
"best_global_step": 2838,
"best_metric": 0.9269727168763274,
"best_model_checkpoint": "./results/checkpoint-2838",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 2838,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.052854122621564484,
"grad_norm": 7.628295421600342,
"learning_rate": 1.9654686398872448e-05,
"loss": 0.3925,
"step": 50
},
{
"epoch": 0.10570824524312897,
"grad_norm": 12.30865478515625,
"learning_rate": 1.9302325581395353e-05,
"loss": 0.3431,
"step": 100
},
{
"epoch": 0.15856236786469344,
"grad_norm": 14.237386703491211,
"learning_rate": 1.8949964763918254e-05,
"loss": 0.3237,
"step": 150
},
{
"epoch": 0.21141649048625794,
"grad_norm": 28.253515243530273,
"learning_rate": 1.8597603946441155e-05,
"loss": 0.3131,
"step": 200
},
{
"epoch": 0.2642706131078224,
"grad_norm": 7.577988624572754,
"learning_rate": 1.824524312896406e-05,
"loss": 0.3027,
"step": 250
},
{
"epoch": 0.3171247357293869,
"grad_norm": 12.410346031188965,
"learning_rate": 1.7892882311486964e-05,
"loss": 0.2873,
"step": 300
},
{
"epoch": 0.3699788583509514,
"grad_norm": 17.205244064331055,
"learning_rate": 1.754052149400987e-05,
"loss": 0.3037,
"step": 350
},
{
"epoch": 0.42283298097251587,
"grad_norm": 15.440715789794922,
"learning_rate": 1.718816067653277e-05,
"loss": 0.2388,
"step": 400
},
{
"epoch": 0.47568710359408034,
"grad_norm": 18.95528793334961,
"learning_rate": 1.6835799859055675e-05,
"loss": 0.2834,
"step": 450
},
{
"epoch": 0.5285412262156448,
"grad_norm": 11.796369552612305,
"learning_rate": 1.6483439041578576e-05,
"loss": 0.282,
"step": 500
},
{
"epoch": 0.5813953488372093,
"grad_norm": 16.309091567993164,
"learning_rate": 1.613107822410148e-05,
"loss": 0.2861,
"step": 550
},
{
"epoch": 0.6342494714587738,
"grad_norm": 34.3432731628418,
"learning_rate": 1.5778717406624385e-05,
"loss": 0.295,
"step": 600
},
{
"epoch": 0.6871035940803383,
"grad_norm": 7.3598151206970215,
"learning_rate": 1.542635658914729e-05,
"loss": 0.2786,
"step": 650
},
{
"epoch": 0.7399577167019028,
"grad_norm": 19.721378326416016,
"learning_rate": 1.5073995771670191e-05,
"loss": 0.2627,
"step": 700
},
{
"epoch": 0.7928118393234672,
"grad_norm": 6.87978982925415,
"learning_rate": 1.4721634954193096e-05,
"loss": 0.274,
"step": 750
},
{
"epoch": 0.8456659619450317,
"grad_norm": 23.790388107299805,
"learning_rate": 1.4369274136715999e-05,
"loss": 0.2477,
"step": 800
},
{
"epoch": 0.8985200845665962,
"grad_norm": 25.269287109375,
"learning_rate": 1.4016913319238903e-05,
"loss": 0.2559,
"step": 850
},
{
"epoch": 0.9513742071881607,
"grad_norm": 12.499388694763184,
"learning_rate": 1.3664552501761804e-05,
"loss": 0.2535,
"step": 900
},
{
"epoch": 1.0,
"eval_f1_macro": 0.8913938479590982,
"eval_f1_micro": 0.8905597326649958,
"eval_loss": 0.23979686200618744,
"eval_precision": 0.9089358799454298,
"eval_recall": 0.872911889944317,
"eval_runtime": 10.313,
"eval_samples_per_second": 366.721,
"eval_steps_per_second": 22.981,
"step": 946
},
{
"epoch": 1.0042283298097252,
"grad_norm": 20.737754821777344,
"learning_rate": 1.3312191684284707e-05,
"loss": 0.2342,
"step": 950
},
{
"epoch": 1.0570824524312896,
"grad_norm": 8.743599891662598,
"learning_rate": 1.2959830866807612e-05,
"loss": 0.1932,
"step": 1000
},
{
"epoch": 1.109936575052854,
"grad_norm": 15.532188415527344,
"learning_rate": 1.2607470049330515e-05,
"loss": 0.1546,
"step": 1050
},
{
"epoch": 1.1627906976744187,
"grad_norm": 10.706155776977539,
"learning_rate": 1.225510923185342e-05,
"loss": 0.2058,
"step": 1100
},
{
"epoch": 1.215644820295983,
"grad_norm": 6.785822868347168,
"learning_rate": 1.1902748414376322e-05,
"loss": 0.1603,
"step": 1150
},
{
"epoch": 1.2684989429175475,
"grad_norm": 19.18027687072754,
"learning_rate": 1.1550387596899227e-05,
"loss": 0.17,
"step": 1200
},
{
"epoch": 1.3213530655391121,
"grad_norm": 5.942857265472412,
"learning_rate": 1.1198026779422128e-05,
"loss": 0.2369,
"step": 1250
},
{
"epoch": 1.3742071881606766,
"grad_norm": 3.4256839752197266,
"learning_rate": 1.0845665961945033e-05,
"loss": 0.1843,
"step": 1300
},
{
"epoch": 1.427061310782241,
"grad_norm": 21.69367218017578,
"learning_rate": 1.0493305144467936e-05,
"loss": 0.1798,
"step": 1350
},
{
"epoch": 1.4799154334038054,
"grad_norm": 12.499094009399414,
"learning_rate": 1.014094432699084e-05,
"loss": 0.2137,
"step": 1400
},
{
"epoch": 1.53276955602537,
"grad_norm": 9.75059700012207,
"learning_rate": 9.788583509513743e-06,
"loss": 0.1616,
"step": 1450
},
{
"epoch": 1.5856236786469344,
"grad_norm": 16.770835876464844,
"learning_rate": 9.436222692036646e-06,
"loss": 0.1711,
"step": 1500
},
{
"epoch": 1.638477801268499,
"grad_norm": 7.7109222412109375,
"learning_rate": 9.083861874559549e-06,
"loss": 0.1757,
"step": 1550
},
{
"epoch": 1.6913319238900635,
"grad_norm": 8.317294120788574,
"learning_rate": 8.731501057082454e-06,
"loss": 0.1886,
"step": 1600
},
{
"epoch": 1.744186046511628,
"grad_norm": 7.418989181518555,
"learning_rate": 8.379140239605357e-06,
"loss": 0.1743,
"step": 1650
},
{
"epoch": 1.7970401691331923,
"grad_norm": 15.134026527404785,
"learning_rate": 8.02677942212826e-06,
"loss": 0.1829,
"step": 1700
},
{
"epoch": 1.8498942917547567,
"grad_norm": 8.40051555633545,
"learning_rate": 7.674418604651164e-06,
"loss": 0.1516,
"step": 1750
},
{
"epoch": 1.9027484143763214,
"grad_norm": 42.32719039916992,
"learning_rate": 7.322057787174067e-06,
"loss": 0.1574,
"step": 1800
},
{
"epoch": 1.955602536997886,
"grad_norm": 7.198770046234131,
"learning_rate": 6.969696969696971e-06,
"loss": 0.1796,
"step": 1850
},
{
"epoch": 2.0,
"eval_f1_macro": 0.9171441852345779,
"eval_f1_micro": 0.9165967444202048,
"eval_loss": 0.207748144865036,
"eval_precision": 0.9397797660013765,
"eval_recall": 0.8945299705207992,
"eval_runtime": 10.3145,
"eval_samples_per_second": 366.667,
"eval_steps_per_second": 22.977,
"step": 1892
},
{
"epoch": 2.0084566596194504,
"grad_norm": 3.685957193374634,
"learning_rate": 6.6173361522198745e-06,
"loss": 0.1328,
"step": 1900
},
{
"epoch": 2.061310782241015,
"grad_norm": 12.144611358642578,
"learning_rate": 6.2649753347427766e-06,
"loss": 0.1201,
"step": 1950
},
{
"epoch": 2.1141649048625792,
"grad_norm": 26.261823654174805,
"learning_rate": 5.91261451726568e-06,
"loss": 0.1465,
"step": 2000
},
{
"epoch": 2.1670190274841437,
"grad_norm": 42.77655029296875,
"learning_rate": 5.560253699788583e-06,
"loss": 0.1231,
"step": 2050
},
{
"epoch": 2.219873150105708,
"grad_norm": 10.951128959655762,
"learning_rate": 5.207892882311487e-06,
"loss": 0.1367,
"step": 2100
},
{
"epoch": 2.2727272727272725,
"grad_norm": 12.861650466918945,
"learning_rate": 4.855532064834391e-06,
"loss": 0.1056,
"step": 2150
},
{
"epoch": 2.3255813953488373,
"grad_norm": 16.26862144470215,
"learning_rate": 4.5031712473572945e-06,
"loss": 0.1259,
"step": 2200
},
{
"epoch": 2.3784355179704018,
"grad_norm": 11.81278133392334,
"learning_rate": 4.150810429880197e-06,
"loss": 0.1214,
"step": 2250
},
{
"epoch": 2.431289640591966,
"grad_norm": 1.1252022981643677,
"learning_rate": 3.798449612403101e-06,
"loss": 0.1017,
"step": 2300
},
{
"epoch": 2.4841437632135306,
"grad_norm": 11.047788619995117,
"learning_rate": 3.4460887949260045e-06,
"loss": 0.1263,
"step": 2350
},
{
"epoch": 2.536997885835095,
"grad_norm": 16.96929359436035,
"learning_rate": 3.0937279774489083e-06,
"loss": 0.0986,
"step": 2400
},
{
"epoch": 2.58985200845666,
"grad_norm": 2.476245164871216,
"learning_rate": 2.741367159971811e-06,
"loss": 0.1105,
"step": 2450
},
{
"epoch": 2.6427061310782243,
"grad_norm": 5.029143810272217,
"learning_rate": 2.389006342494715e-06,
"loss": 0.1219,
"step": 2500
},
{
"epoch": 2.6955602536997887,
"grad_norm": 16.73661231994629,
"learning_rate": 2.0366455250176183e-06,
"loss": 0.1087,
"step": 2550
},
{
"epoch": 2.748414376321353,
"grad_norm": 1.3909024000167847,
"learning_rate": 1.6842847075405216e-06,
"loss": 0.1118,
"step": 2600
},
{
"epoch": 2.8012684989429175,
"grad_norm": 14.585844993591309,
"learning_rate": 1.3319238900634251e-06,
"loss": 0.1018,
"step": 2650
},
{
"epoch": 2.854122621564482,
"grad_norm": 24.876344680786133,
"learning_rate": 9.795630725863285e-07,
"loss": 0.1001,
"step": 2700
},
{
"epoch": 2.9069767441860463,
"grad_norm": 28.51801872253418,
"learning_rate": 6.272022551092319e-07,
"loss": 0.1004,
"step": 2750
},
{
"epoch": 2.9598308668076108,
"grad_norm": 2.746856689453125,
"learning_rate": 2.748414376321353e-07,
"loss": 0.1181,
"step": 2800
},
{
"epoch": 3.0,
"eval_f1_macro": 0.927417514244305,
"eval_f1_micro": 0.9269727168763274,
"eval_loss": 0.2070944607257843,
"eval_precision": 0.9247066492829205,
"eval_recall": 0.9292499181133311,
"eval_runtime": 10.2948,
"eval_samples_per_second": 367.371,
"eval_steps_per_second": 23.021,
"step": 2838
}
],
"logging_steps": 50,
"max_steps": 2838,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.200759923346432e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}