EleanorZzz
/

persuasion_simulation_tulu3sft_sft_w_promp_10epochs

@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 # persuasion_simulation_tulu3_8b_sft_sft_w_promp_10epochs
-This model is a fine-tuned version of [allenai/Llama-3.1-Tulu-3-8B-SFT](https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B-SFT) on an unknown dataset.
 ## Model description

 # persuasion_simulation_tulu3_8b_sft_sft_w_promp_10epochs
+This model is a fine-tuned version of [allenai/Llama-3.1-Tulu-3-8B-SFT](https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B-SFT) on the persuasion_simulation dataset.
 ## Model description

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 4.908108108108108,
-    "total_flos": 6842883440640.0,
-    "train_loss": 0.6322610969128816,
-    "train_runtime": 1920.4432,
-    "train_samples_per_second": 1.924,
-    "train_steps_per_second": 0.12
 }

 {
+    "epoch": 9.8,
+    "total_flos": 13673089597440.0,
+    "train_loss": 0.34487654119729994,
+    "train_runtime": 4380.6043,
+    "train_samples_per_second": 1.687,
+    "train_steps_per_second": 0.105
 }

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 4.908108108108108,
-    "total_flos": 6842883440640.0,
-    "train_loss": 0.6322610969128816,
-    "train_runtime": 1920.4432,
-    "train_samples_per_second": 1.924,
-    "train_steps_per_second": 0.12
 }

 {
+    "epoch": 9.8,
+    "total_flos": 13673089597440.0,
+    "train_loss": 0.34487654119729994,
+    "train_runtime": 4380.6043,
+    "train_samples_per_second": 1.687,
+    "train_steps_per_second": 0.105
 }

trainer_state.json CHANGED Viewed

@@ -2,188 +2,349 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 4.908108108108108,
   "eval_steps": 500,
-  "global_step": 230,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.21621621621621623,
-      "grad_norm": 7.521789519465365,
-      "learning_rate": 4.347826086956522e-06,
-      "loss": 2.5607,
       "step": 10
     },
     {
       "epoch": 0.43243243243243246,
-      "grad_norm": 3.213619656849043,
-      "learning_rate": 8.695652173913044e-06,
-      "loss": 1.1719,
       "step": 20
     },
     {
       "epoch": 0.6486486486486487,
-      "grad_norm": 1.9437444652365807,
-      "learning_rate": 9.971810547786794e-06,
-      "loss": 1.0216,
       "step": 30
     },
     {
       "epoch": 0.8648648648648649,
-      "grad_norm": 1.869691703158474,
-      "learning_rate": 9.834504404631032e-06,
-      "loss": 0.9938,
       "step": 40
     },
     {
       "epoch": 1.0648648648648649,
-      "grad_norm": 1.8384505363173806,
-      "learning_rate": 9.586056507527266e-06,
-      "loss": 0.9102,
       "step": 50
     },
     {
       "epoch": 1.281081081081081,
-      "grad_norm": 1.7699518643234728,
-      "learning_rate": 9.232178493644006e-06,
-      "loss": 0.7888,
       "step": 60
     },
     {
       "epoch": 1.4972972972972973,
-      "grad_norm": 1.9014403364367916,
-      "learning_rate": 8.781005762156593e-06,
-      "loss": 0.7679,
       "step": 70
     },
     {
       "epoch": 1.7135135135135136,
-      "grad_norm": 1.6683999198252772,
-      "learning_rate": 8.24291044731378e-06,
-      "loss": 0.7813,
       "step": 80
     },
     {
       "epoch": 1.9297297297297298,
-      "grad_norm": 1.758096343286875,
-      "learning_rate": 7.630262970585355e-06,
-      "loss": 0.7773,
       "step": 90
     },
     {
       "epoch": 2.1297297297297297,
-      "grad_norm": 2.2320248203710222,
-      "learning_rate": 6.957147653634198e-06,
-      "loss": 0.6619,
       "step": 100
     },
     {
       "epoch": 2.345945945945946,
-      "grad_norm": 2.1598362259959885,
-      "learning_rate": 6.2390389299645e-06,
-      "loss": 0.5109,
       "step": 110
     },
     {
       "epoch": 2.562162162162162,
-      "grad_norm": 1.9698378181001832,
-      "learning_rate": 5.492445598905843e-06,
-      "loss": 0.5128,
       "step": 120
     },
     {
       "epoch": 2.7783783783783784,
-      "grad_norm": 2.207004805490676,
-      "learning_rate": 4.7345313002762545e-06,
-      "loss": 0.5028,
       "step": 130
     },
     {
       "epoch": 2.9945945945945946,
-      "grad_norm": 2.0954739967997753,
-      "learning_rate": 3.982719934736832e-06,
-      "loss": 0.498,
       "step": 140
     },
     {
       "epoch": 3.1945945945945944,
-      "grad_norm": 4.1425174803645275,
-      "learning_rate": 3.2542951009381584e-06,
-      "loss": 0.3427,
       "step": 150
     },
     {
       "epoch": 3.410810810810811,
-      "grad_norm": 2.9187774194524816,
-      "learning_rate": 2.566002758108256e-06,
-      "loss": 0.2867,
       "step": 160
     },
     {
       "epoch": 3.627027027027027,
-      "grad_norm": 2.5178609130134593,
-      "learning_rate": 1.933666248581418e-06,
-      "loss": 0.2728,
       "step": 170
     },
     {
       "epoch": 3.8432432432432435,
-      "grad_norm": 2.546624967772346,
-      "learning_rate": 1.3718225306210049e-06,
-      "loss": 0.2752,
       "step": 180
     },
     {
       "epoch": 4.043243243243243,
-      "grad_norm": 3.0183024513477084,
-      "learning_rate": 8.933879842801558e-07,
-      "loss": 0.2473,
       "step": 190
     },
     {
       "epoch": 4.2594594594594595,
-      "grad_norm": 3.05168507955595,
-      "learning_rate": 5.0936147318152e-07,
-      "loss": 0.1699,
       "step": 200
     },
     {
       "epoch": 4.475675675675676,
-      "grad_norm": 2.293078858586022,
-      "learning_rate": 2.2857148861060552e-07,
-      "loss": 0.1666,
       "step": 210
     },
     {
       "epoch": 4.691891891891892,
-      "grad_norm": 2.118854972987958,
-      "learning_rate": 5.747318889684883e-08,
-      "loss": 0.1614,
       "step": 220
     },
     {
       "epoch": 4.908108108108108,
-      "grad_norm": 2.0202763257750007,
-      "learning_rate": 0.0,
-      "loss": 0.1594,
       "step": 230
     },
     {
-      "epoch": 4.908108108108108,
-      "step": 230,
-      "total_flos": 6842883440640.0,
-      "train_loss": 0.6322610969128816,
-      "train_runtime": 1920.4432,
-      "train_samples_per_second": 1.924,
-      "train_steps_per_second": 0.12
     }
   ],
   "logging_steps": 10,
-  "max_steps": 230,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 5,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -197,7 +358,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6842883440640.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 9.8,
   "eval_steps": 500,
+  "global_step": 460,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.21621621621621623,
+      "grad_norm": 7.928413532238152,
+      "learning_rate": 2.173913043478261e-06,
+      "loss": 2.1422,
       "step": 10
     },
     {
       "epoch": 0.43243243243243246,
+      "grad_norm": 6.01372163805319,
+      "learning_rate": 4.347826086956522e-06,
+      "loss": 1.3261,
       "step": 20
     },
     {
       "epoch": 0.6486486486486487,
+      "grad_norm": 2.143165243967343,
+      "learning_rate": 6.521739130434783e-06,
+      "loss": 1.0565,
       "step": 30
     },
     {
       "epoch": 0.8648648648648649,
+      "grad_norm": 2.002981301105067,
+      "learning_rate": 8.695652173913044e-06,
+      "loss": 1.0008,
       "step": 40
     },
     {
       "epoch": 1.0648648648648649,
+      "grad_norm": 2.003768096340962,
+      "learning_rate": 9.997696831512027e-06,
+      "loss": 0.9263,
       "step": 50
     },
     {
       "epoch": 1.281081081081081,
+      "grad_norm": 1.9628397812309482,
+      "learning_rate": 9.971810547786794e-06,
+      "loss": 0.832,
       "step": 60
     },
     {
       "epoch": 1.4972972972972973,
+      "grad_norm": 1.9670767045650799,
+      "learning_rate": 9.917308508168712e-06,
+      "loss": 0.8092,
       "step": 70
     },
     {
       "epoch": 1.7135135135135136,
+      "grad_norm": 1.8258318637551825,
+      "learning_rate": 9.834504404631032e-06,
+      "loss": 0.8275,
       "step": 80
     },
     {
       "epoch": 1.9297297297297298,
+      "grad_norm": 1.853270348883972,
+      "learning_rate": 9.72387482452377e-06,
+      "loss": 0.8257,
       "step": 90
     },
     {
       "epoch": 2.1297297297297297,
+      "grad_norm": 2.181312155139493,
+      "learning_rate": 9.586056507527266e-06,
+      "loss": 0.7141,
       "step": 100
     },
     {
       "epoch": 2.345945945945946,
+      "grad_norm": 2.2635996413273576,
+      "learning_rate": 9.421842680832862e-06,
+      "loss": 0.5572,
       "step": 110
     },
     {
       "epoch": 2.562162162162162,
+      "grad_norm": 2.0267513780569555,
+      "learning_rate": 9.232178493644006e-06,
+      "loss": 0.5536,
       "step": 120
     },
     {
       "epoch": 2.7783783783783784,
+      "grad_norm": 2.2717141213154233,
+      "learning_rate": 9.018155577274891e-06,
+      "loss": 0.5567,
       "step": 130
     },
     {
       "epoch": 2.9945945945945946,
+      "grad_norm": 2.161425393067172,
+      "learning_rate": 8.781005762156593e-06,
+      "loss": 0.554,
       "step": 140
     },
     {
       "epoch": 3.1945945945945944,
+      "grad_norm": 2.6379003441929583,
+      "learning_rate": 8.522093987913063e-06,
+      "loss": 0.3631,
       "step": 150
     },
     {
       "epoch": 3.410810810810811,
+      "grad_norm": 2.248635452478229,
+      "learning_rate": 8.24291044731378e-06,
+      "loss": 0.302,
       "step": 160
     },
     {
       "epoch": 3.627027027027027,
+      "grad_norm": 2.3569737350184297,
+      "learning_rate": 7.94506200931932e-06,
+      "loss": 0.2989,
       "step": 170
     },
     {
       "epoch": 3.8432432432432435,
+      "grad_norm": 2.622041501021548,
+      "learning_rate": 7.630262970585355e-06,
+      "loss": 0.3074,
       "step": 180
     },
     {
       "epoch": 4.043243243243243,
+      "grad_norm": 2.274362887049295,
+      "learning_rate": 7.300325188655762e-06,
+      "loss": 0.2728,
       "step": 190
     },
     {
       "epoch": 4.2594594594594595,
+      "grad_norm": 2.1104698967330995,
+      "learning_rate": 6.957147653634198e-06,
+      "loss": 0.1462,
       "step": 200
     },
     {
       "epoch": 4.475675675675676,
+      "grad_norm": 2.1899462721461993,
+      "learning_rate": 6.6027055583554865e-06,
+      "loss": 0.1454,
       "step": 210
     },
     {
       "epoch": 4.691891891891892,
+      "grad_norm": 2.6349171406091316,
+      "learning_rate": 6.2390389299645e-06,
+      "loss": 0.1545,
       "step": 220
     },
     {
       "epoch": 4.908108108108108,
+      "grad_norm": 2.1896943214948026,
+      "learning_rate": 5.8682408883346535e-06,
+      "loss": 0.147,
       "step": 230
     },
     {
+      "epoch": 5.108108108108108,
+      "grad_norm": 1.8580244939014094,
+      "learning_rate": 5.492445598905843e-06,
+      "loss": 0.1175,
+      "step": 240
+    },
+    {
+      "epoch": 5.324324324324325,
+      "grad_norm": 1.7205360918150199,
+      "learning_rate": 5.113815989280528e-06,
+      "loss": 0.0828,
+      "step": 250
+    },
+    {
+      "epoch": 5.54054054054054,
+      "grad_norm": 2.041469867596687,
+      "learning_rate": 4.7345313002762545e-06,
+      "loss": 0.0814,
+      "step": 260
+    },
+    {
+      "epoch": 5.756756756756757,
+      "grad_norm": 1.731451961015919,
+      "learning_rate": 4.356774543085845e-06,
+      "loss": 0.083,
+      "step": 270
+    },
+    {
+      "epoch": 5.972972972972973,
+      "grad_norm": 1.640206077124201,
+      "learning_rate": 3.982719934736832e-06,
+      "loss": 0.0863,
+      "step": 280
+    },
+    {
+      "epoch": 6.172972972972973,
+      "grad_norm": 1.6295826391354875,
+      "learning_rate": 3.6145203841665577e-06,
+      "loss": 0.0574,
+      "step": 290
+    },
+    {
+      "epoch": 6.389189189189189,
+      "grad_norm": 1.440211343971061,
+      "learning_rate": 3.2542951009381584e-06,
+      "loss": 0.0536,
+      "step": 300
+    },
+    {
+      "epoch": 6.605405405405405,
+      "grad_norm": 1.4014358014924952,
+      "learning_rate": 2.9041173979166813e-06,
+      "loss": 0.0543,
+      "step": 310
+    },
+    {
+      "epoch": 6.821621621621622,
+      "grad_norm": 1.3463804839310973,
+      "learning_rate": 2.566002758108256e-06,
+      "loss": 0.0541,
+      "step": 320
+    },
+    {
+      "epoch": 7.021621621621621,
+      "grad_norm": 2.579253801667801,
+      "learning_rate": 2.241897234344864e-06,
+      "loss": 0.0507,
+      "step": 330
+    },
+    {
+      "epoch": 7.237837837837838,
+      "grad_norm": 1.1381118685241185,
+      "learning_rate": 1.933666248581418e-06,
+      "loss": 0.0337,
+      "step": 340
+    },
+    {
+      "epoch": 7.454054054054054,
+      "grad_norm": 1.1614358836368845,
+      "learning_rate": 1.6430838552720168e-06,
+      "loss": 0.0347,
+      "step": 350
+    },
+    {
+      "epoch": 7.6702702702702705,
+      "grad_norm": 0.9439702681790234,
+      "learning_rate": 1.3718225306210049e-06,
+      "loss": 0.0346,
+      "step": 360
+    },
+    {
+      "epoch": 7.886486486486486,
+      "grad_norm": 0.9350344061498097,
+      "learning_rate": 1.1214435464779006e-06,
+      "loss": 0.0345,
+      "step": 370
+    },
+    {
+      "epoch": 8.086486486486486,
+      "grad_norm": 0.6158081847960301,
+      "learning_rate": 8.933879842801558e-07,
+      "loss": 0.0292,
+      "step": 380
+    },
+    {
+      "epoch": 8.302702702702703,
+      "grad_norm": 0.8470578217546869,
+      "learning_rate": 6.889684407639324e-07,
+      "loss": 0.0219,
+      "step": 390
+    },
+    {
+      "epoch": 8.518918918918919,
+      "grad_norm": 0.7060993947541574,
+      "learning_rate": 5.0936147318152e-07,
+      "loss": 0.022,
+      "step": 400
+    },
+    {
+      "epoch": 8.735135135135135,
+      "grad_norm": 0.5972472978430013,
+      "learning_rate": 3.55600827507665e-07,
+      "loss": 0.0216,
+      "step": 410
+    },
+    {
+      "epoch": 8.951351351351352,
+      "grad_norm": 0.6814447950625864,
+      "learning_rate": 2.2857148861060552e-07,
+      "loss": 0.0217,
+      "step": 420
+    },
+    {
+      "epoch": 9.151351351351352,
+      "grad_norm": 0.45326924305283917,
+      "learning_rate": 1.2900458663260506e-07,
+      "loss": 0.0186,
+      "step": 430
+    },
+    {
+      "epoch": 9.367567567567567,
+      "grad_norm": 0.3976668377272393,
+      "learning_rate": 5.747318889684883e-08,
+      "loss": 0.0173,
+      "step": 440
+    },
+    {
+      "epoch": 9.583783783783783,
+      "grad_norm": 0.47000235405688096,
+      "learning_rate": 1.4389001560803917e-08,
+      "loss": 0.0176,
+      "step": 450
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": 0.4716229816777135,
+      "learning_rate": 0.0,
+      "loss": 0.0169,
+      "step": 460
+    },
+    {
+      "epoch": 9.8,
+      "step": 460,
+      "total_flos": 13673089597440.0,
+      "train_loss": 0.34487654119729994,
+      "train_runtime": 4380.6043,
+      "train_samples_per_second": 1.687,
+      "train_steps_per_second": 0.105
     }
   ],
   "logging_steps": 10,
+  "max_steps": 460,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 13673089597440.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

training_loss.png CHANGED Viewed