Training in progress, step 950, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 456206152
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1089357c04ec4a0de85e536d52bb4c8df60d290b4d9d5b00a873e9fd046dbbc
|
| 3 |
size 456206152
|
last-checkpoint/optimizer.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 912763251
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:341005da48ef83ba8e839e0b70ed4e82e9000785e704bde8bfccb97361384f99
|
| 3 |
size 912763251
|
last-checkpoint/pytorch_model_fsdp.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 456340209
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:26402a2eca103da6a9d310b909392899395babe69e239568a171a2b21830103e
|
| 3 |
size 456340209
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14917
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2cdab9c82a05ed01f13b244c083ffefdc46b875ecbe29601f180ef3e698088da
|
| 3 |
size 14917
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14917
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:53d0d0d70f1e731a3047262bd6862bc5a552fb1c97f56fe3ab8a8bfb39f818e9
|
| 3 |
size 14917
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92949f20b07ea4400476cbbf4d64075409dbdf1f6201cbb60ef6c1f93ae34bd6
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -6460,6 +6460,364 @@
|
|
| 6460 |
"eval_samples_per_second": 0.257,
|
| 6461 |
"eval_steps_per_second": 0.134,
|
| 6462 |
"step": 900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6463 |
}
|
| 6464 |
],
|
| 6465 |
"logging_steps": 1,
|
|
@@ -6479,7 +6837,7 @@
|
|
| 6479 |
"attributes": {}
|
| 6480 |
}
|
| 6481 |
},
|
| 6482 |
-
"total_flos":
|
| 6483 |
"train_batch_size": 1,
|
| 6484 |
"trial_name": null,
|
| 6485 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.9308943089430894,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 950,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 6460 |
"eval_samples_per_second": 0.257,
|
| 6461 |
"eval_steps_per_second": 0.134,
|
| 6462 |
"step": 900
|
| 6463 |
+
},
|
| 6464 |
+
{
|
| 6465 |
+
"epoch": 1.83130081300813,
|
| 6466 |
+
"grad_norm": 0.3286548852920532,
|
| 6467 |
+
"learning_rate": 9.484105088313405e-08,
|
| 6468 |
+
"loss": 2.4378,
|
| 6469 |
+
"step": 901
|
| 6470 |
+
},
|
| 6471 |
+
{
|
| 6472 |
+
"epoch": 1.8333333333333335,
|
| 6473 |
+
"grad_norm": 0.3614640533924103,
|
| 6474 |
+
"learning_rate": 9.261033555538562e-08,
|
| 6475 |
+
"loss": 2.5291,
|
| 6476 |
+
"step": 902
|
| 6477 |
+
},
|
| 6478 |
+
{
|
| 6479 |
+
"epoch": 1.8353658536585367,
|
| 6480 |
+
"grad_norm": 0.4283101558685303,
|
| 6481 |
+
"learning_rate": 9.040567210362756e-08,
|
| 6482 |
+
"loss": 2.7602,
|
| 6483 |
+
"step": 903
|
| 6484 |
+
},
|
| 6485 |
+
{
|
| 6486 |
+
"epoch": 1.8373983739837398,
|
| 6487 |
+
"grad_norm": 0.4066496789455414,
|
| 6488 |
+
"learning_rate": 8.822708438590871e-08,
|
| 6489 |
+
"loss": 2.4093,
|
| 6490 |
+
"step": 904
|
| 6491 |
+
},
|
| 6492 |
+
{
|
| 6493 |
+
"epoch": 1.839430894308943,
|
| 6494 |
+
"grad_norm": 0.29669034481048584,
|
| 6495 |
+
"learning_rate": 8.607459597809565e-08,
|
| 6496 |
+
"loss": 2.4789,
|
| 6497 |
+
"step": 905
|
| 6498 |
+
},
|
| 6499 |
+
{
|
| 6500 |
+
"epoch": 1.8414634146341462,
|
| 6501 |
+
"grad_norm": 0.38676008582115173,
|
| 6502 |
+
"learning_rate": 8.394823017361747e-08,
|
| 6503 |
+
"loss": 2.7217,
|
| 6504 |
+
"step": 906
|
| 6505 |
+
},
|
| 6506 |
+
{
|
| 6507 |
+
"epoch": 1.8434959349593496,
|
| 6508 |
+
"grad_norm": 0.3684881627559662,
|
| 6509 |
+
"learning_rate": 8.184800998321418e-08,
|
| 6510 |
+
"loss": 2.5145,
|
| 6511 |
+
"step": 907
|
| 6512 |
+
},
|
| 6513 |
+
{
|
| 6514 |
+
"epoch": 1.845528455284553,
|
| 6515 |
+
"grad_norm": 0.37486544251441956,
|
| 6516 |
+
"learning_rate": 7.977395813468792e-08,
|
| 6517 |
+
"loss": 2.3948,
|
| 6518 |
+
"step": 908
|
| 6519 |
+
},
|
| 6520 |
+
{
|
| 6521 |
+
"epoch": 1.8475609756097562,
|
| 6522 |
+
"grad_norm": 0.38173386454582214,
|
| 6523 |
+
"learning_rate": 7.772609707265732e-08,
|
| 6524 |
+
"loss": 2.4007,
|
| 6525 |
+
"step": 909
|
| 6526 |
+
},
|
| 6527 |
+
{
|
| 6528 |
+
"epoch": 1.8495934959349594,
|
| 6529 |
+
"grad_norm": 0.3323315680027008,
|
| 6530 |
+
"learning_rate": 7.57044489583128e-08,
|
| 6531 |
+
"loss": 2.3632,
|
| 6532 |
+
"step": 910
|
| 6533 |
+
},
|
| 6534 |
+
{
|
| 6535 |
+
"epoch": 1.8516260162601625,
|
| 6536 |
+
"grad_norm": 0.34292104840278625,
|
| 6537 |
+
"learning_rate": 7.370903566917915e-08,
|
| 6538 |
+
"loss": 2.6982,
|
| 6539 |
+
"step": 911
|
| 6540 |
+
},
|
| 6541 |
+
{
|
| 6542 |
+
"epoch": 1.8536585365853657,
|
| 6543 |
+
"grad_norm": 0.36134734749794006,
|
| 6544 |
+
"learning_rate": 7.173987879887683e-08,
|
| 6545 |
+
"loss": 2.5694,
|
| 6546 |
+
"step": 912
|
| 6547 |
+
},
|
| 6548 |
+
{
|
| 6549 |
+
"epoch": 1.8556910569105691,
|
| 6550 |
+
"grad_norm": 0.4461964964866638,
|
| 6551 |
+
"learning_rate": 6.97969996568898e-08,
|
| 6552 |
+
"loss": 2.4623,
|
| 6553 |
+
"step": 913
|
| 6554 |
+
},
|
| 6555 |
+
{
|
| 6556 |
+
"epoch": 1.8577235772357723,
|
| 6557 |
+
"grad_norm": 0.36540645360946655,
|
| 6558 |
+
"learning_rate": 6.788041926833382e-08,
|
| 6559 |
+
"loss": 2.5548,
|
| 6560 |
+
"step": 914
|
| 6561 |
+
},
|
| 6562 |
+
{
|
| 6563 |
+
"epoch": 1.8597560975609757,
|
| 6564 |
+
"grad_norm": 0.3682396113872528,
|
| 6565 |
+
"learning_rate": 6.599015837372907e-08,
|
| 6566 |
+
"loss": 2.5853,
|
| 6567 |
+
"step": 915
|
| 6568 |
+
},
|
| 6569 |
+
{
|
| 6570 |
+
"epoch": 1.8617886178861789,
|
| 6571 |
+
"grad_norm": 0.35821810364723206,
|
| 6572 |
+
"learning_rate": 6.412623742877655e-08,
|
| 6573 |
+
"loss": 2.5411,
|
| 6574 |
+
"step": 916
|
| 6575 |
+
},
|
| 6576 |
+
{
|
| 6577 |
+
"epoch": 1.863821138211382,
|
| 6578 |
+
"grad_norm": 0.44045495986938477,
|
| 6579 |
+
"learning_rate": 6.228867660413557e-08,
|
| 6580 |
+
"loss": 2.2603,
|
| 6581 |
+
"step": 917
|
| 6582 |
+
},
|
| 6583 |
+
{
|
| 6584 |
+
"epoch": 1.8658536585365852,
|
| 6585 |
+
"grad_norm": 0.38515955209732056,
|
| 6586 |
+
"learning_rate": 6.04774957852064e-08,
|
| 6587 |
+
"loss": 2.9653,
|
| 6588 |
+
"step": 918
|
| 6589 |
+
},
|
| 6590 |
+
{
|
| 6591 |
+
"epoch": 1.8678861788617886,
|
| 6592 |
+
"grad_norm": 0.36234351992607117,
|
| 6593 |
+
"learning_rate": 5.869271457191433e-08,
|
| 6594 |
+
"loss": 2.4239,
|
| 6595 |
+
"step": 919
|
| 6596 |
+
},
|
| 6597 |
+
{
|
| 6598 |
+
"epoch": 1.8699186991869918,
|
| 6599 |
+
"grad_norm": 0.3159945011138916,
|
| 6600 |
+
"learning_rate": 5.693435227849875e-08,
|
| 6601 |
+
"loss": 2.4183,
|
| 6602 |
+
"step": 920
|
| 6603 |
+
},
|
| 6604 |
+
{
|
| 6605 |
+
"epoch": 1.8719512195121952,
|
| 6606 |
+
"grad_norm": 0.37130528688430786,
|
| 6607 |
+
"learning_rate": 5.520242793330216e-08,
|
| 6608 |
+
"loss": 2.52,
|
| 6609 |
+
"step": 921
|
| 6610 |
+
},
|
| 6611 |
+
{
|
| 6612 |
+
"epoch": 1.8739837398373984,
|
| 6613 |
+
"grad_norm": 0.4329441487789154,
|
| 6614 |
+
"learning_rate": 5.3496960278565935e-08,
|
| 6615 |
+
"loss": 2.4319,
|
| 6616 |
+
"step": 922
|
| 6617 |
+
},
|
| 6618 |
+
{
|
| 6619 |
+
"epoch": 1.8760162601626016,
|
| 6620 |
+
"grad_norm": 0.32947462797164917,
|
| 6621 |
+
"learning_rate": 5.181796777022713e-08,
|
| 6622 |
+
"loss": 2.4703,
|
| 6623 |
+
"step": 923
|
| 6624 |
+
},
|
| 6625 |
+
{
|
| 6626 |
+
"epoch": 1.8780487804878048,
|
| 6627 |
+
"grad_norm": 0.41265442967414856,
|
| 6628 |
+
"learning_rate": 5.0165468577718924e-08,
|
| 6629 |
+
"loss": 2.8564,
|
| 6630 |
+
"step": 924
|
| 6631 |
+
},
|
| 6632 |
+
{
|
| 6633 |
+
"epoch": 1.8800813008130082,
|
| 6634 |
+
"grad_norm": 0.43159809708595276,
|
| 6635 |
+
"learning_rate": 4.853948058377245e-08,
|
| 6636 |
+
"loss": 2.6758,
|
| 6637 |
+
"step": 925
|
| 6638 |
+
},
|
| 6639 |
+
{
|
| 6640 |
+
"epoch": 1.8821138211382114,
|
| 6641 |
+
"grad_norm": 0.3749174475669861,
|
| 6642 |
+
"learning_rate": 4.6940021384226095e-08,
|
| 6643 |
+
"loss": 2.5812,
|
| 6644 |
+
"step": 926
|
| 6645 |
+
},
|
| 6646 |
+
{
|
| 6647 |
+
"epoch": 1.8841463414634148,
|
| 6648 |
+
"grad_norm": 0.2780403792858124,
|
| 6649 |
+
"learning_rate": 4.5367108287832085e-08,
|
| 6650 |
+
"loss": 2.5903,
|
| 6651 |
+
"step": 927
|
| 6652 |
+
},
|
| 6653 |
+
{
|
| 6654 |
+
"epoch": 1.886178861788618,
|
| 6655 |
+
"grad_norm": 0.4100690484046936,
|
| 6656 |
+
"learning_rate": 4.3820758316071854e-08,
|
| 6657 |
+
"loss": 2.4091,
|
| 6658 |
+
"step": 928
|
| 6659 |
+
},
|
| 6660 |
+
{
|
| 6661 |
+
"epoch": 1.8882113821138211,
|
| 6662 |
+
"grad_norm": 0.4257347583770752,
|
| 6663 |
+
"learning_rate": 4.2300988202969296e-08,
|
| 6664 |
+
"loss": 2.4165,
|
| 6665 |
+
"step": 929
|
| 6666 |
+
},
|
| 6667 |
+
{
|
| 6668 |
+
"epoch": 1.8902439024390243,
|
| 6669 |
+
"grad_norm": 0.3895331621170044,
|
| 6670 |
+
"learning_rate": 4.0807814394911996e-08,
|
| 6671 |
+
"loss": 2.2612,
|
| 6672 |
+
"step": 930
|
| 6673 |
+
},
|
| 6674 |
+
{
|
| 6675 |
+
"epoch": 1.8922764227642277,
|
| 6676 |
+
"grad_norm": 0.41140511631965637,
|
| 6677 |
+
"learning_rate": 3.934125305047165e-08,
|
| 6678 |
+
"loss": 2.6891,
|
| 6679 |
+
"step": 931
|
| 6680 |
+
},
|
| 6681 |
+
{
|
| 6682 |
+
"epoch": 1.8943089430894309,
|
| 6683 |
+
"grad_norm": 0.3074701428413391,
|
| 6684 |
+
"learning_rate": 3.790132004022978e-08,
|
| 6685 |
+
"loss": 2.4966,
|
| 6686 |
+
"step": 932
|
| 6687 |
+
},
|
| 6688 |
+
{
|
| 6689 |
+
"epoch": 1.8963414634146343,
|
| 6690 |
+
"grad_norm": 0.3473949432373047,
|
| 6691 |
+
"learning_rate": 3.6488030946606744e-08,
|
| 6692 |
+
"loss": 2.4893,
|
| 6693 |
+
"step": 933
|
| 6694 |
+
},
|
| 6695 |
+
{
|
| 6696 |
+
"epoch": 1.8983739837398375,
|
| 6697 |
+
"grad_norm": 0.38969168066978455,
|
| 6698 |
+
"learning_rate": 3.510140106369103e-08,
|
| 6699 |
+
"loss": 2.561,
|
| 6700 |
+
"step": 934
|
| 6701 |
+
},
|
| 6702 |
+
{
|
| 6703 |
+
"epoch": 1.9004065040650406,
|
| 6704 |
+
"grad_norm": 0.3749343156814575,
|
| 6705 |
+
"learning_rate": 3.37414453970758e-08,
|
| 6706 |
+
"loss": 2.6589,
|
| 6707 |
+
"step": 935
|
| 6708 |
+
},
|
| 6709 |
+
{
|
| 6710 |
+
"epoch": 1.9024390243902438,
|
| 6711 |
+
"grad_norm": 0.33751150965690613,
|
| 6712 |
+
"learning_rate": 3.2408178663696225e-08,
|
| 6713 |
+
"loss": 2.2882,
|
| 6714 |
+
"step": 936
|
| 6715 |
+
},
|
| 6716 |
+
{
|
| 6717 |
+
"epoch": 1.904471544715447,
|
| 6718 |
+
"grad_norm": 0.40897300839424133,
|
| 6719 |
+
"learning_rate": 3.110161529166878e-08,
|
| 6720 |
+
"loss": 2.456,
|
| 6721 |
+
"step": 937
|
| 6722 |
+
},
|
| 6723 |
+
{
|
| 6724 |
+
"epoch": 1.9065040650406504,
|
| 6725 |
+
"grad_norm": 0.3012900948524475,
|
| 6726 |
+
"learning_rate": 2.982176942013665e-08,
|
| 6727 |
+
"loss": 2.626,
|
| 6728 |
+
"step": 938
|
| 6729 |
+
},
|
| 6730 |
+
{
|
| 6731 |
+
"epoch": 1.9085365853658538,
|
| 6732 |
+
"grad_norm": 0.3892320692539215,
|
| 6733 |
+
"learning_rate": 2.8568654899116254e-08,
|
| 6734 |
+
"loss": 2.8018,
|
| 6735 |
+
"step": 939
|
| 6736 |
+
},
|
| 6737 |
+
{
|
| 6738 |
+
"epoch": 1.910569105691057,
|
| 6739 |
+
"grad_norm": 0.349513441324234,
|
| 6740 |
+
"learning_rate": 2.734228528934679e-08,
|
| 6741 |
+
"loss": 2.2679,
|
| 6742 |
+
"step": 940
|
| 6743 |
+
},
|
| 6744 |
+
{
|
| 6745 |
+
"epoch": 1.9126016260162602,
|
| 6746 |
+
"grad_norm": 0.3486090898513794,
|
| 6747 |
+
"learning_rate": 2.614267386214453e-08,
|
| 6748 |
+
"loss": 2.1905,
|
| 6749 |
+
"step": 941
|
| 6750 |
+
},
|
| 6751 |
+
{
|
| 6752 |
+
"epoch": 1.9146341463414633,
|
| 6753 |
+
"grad_norm": 0.3776640295982361,
|
| 6754 |
+
"learning_rate": 2.49698335992582e-08,
|
| 6755 |
+
"loss": 2.4652,
|
| 6756 |
+
"step": 942
|
| 6757 |
+
},
|
| 6758 |
+
{
|
| 6759 |
+
"epoch": 1.9166666666666665,
|
| 6760 |
+
"grad_norm": 0.3477821350097656,
|
| 6761 |
+
"learning_rate": 2.382377719272938e-08,
|
| 6762 |
+
"loss": 2.6303,
|
| 6763 |
+
"step": 943
|
| 6764 |
+
},
|
| 6765 |
+
{
|
| 6766 |
+
"epoch": 1.91869918699187,
|
| 6767 |
+
"grad_norm": 0.4629431366920471,
|
| 6768 |
+
"learning_rate": 2.2704517044754017e-08,
|
| 6769 |
+
"loss": 2.9256,
|
| 6770 |
+
"step": 944
|
| 6771 |
+
},
|
| 6772 |
+
{
|
| 6773 |
+
"epoch": 1.9207317073170733,
|
| 6774 |
+
"grad_norm": 0.3135490119457245,
|
| 6775 |
+
"learning_rate": 2.161206526754972e-08,
|
| 6776 |
+
"loss": 2.454,
|
| 6777 |
+
"step": 945
|
| 6778 |
+
},
|
| 6779 |
+
{
|
| 6780 |
+
"epoch": 1.9227642276422765,
|
| 6781 |
+
"grad_norm": 0.38131558895111084,
|
| 6782 |
+
"learning_rate": 2.05464336832234e-08,
|
| 6783 |
+
"loss": 2.4313,
|
| 6784 |
+
"step": 946
|
| 6785 |
+
},
|
| 6786 |
+
{
|
| 6787 |
+
"epoch": 1.9247967479674797,
|
| 6788 |
+
"grad_norm": 0.28232431411743164,
|
| 6789 |
+
"learning_rate": 1.9507633823643847e-08,
|
| 6790 |
+
"loss": 2.4393,
|
| 6791 |
+
"step": 947
|
| 6792 |
+
},
|
| 6793 |
+
{
|
| 6794 |
+
"epoch": 1.9268292682926829,
|
| 6795 |
+
"grad_norm": 0.44581282138824463,
|
| 6796 |
+
"learning_rate": 1.849567693031684e-08,
|
| 6797 |
+
"loss": 2.3199,
|
| 6798 |
+
"step": 948
|
| 6799 |
+
},
|
| 6800 |
+
{
|
| 6801 |
+
"epoch": 1.928861788617886,
|
| 6802 |
+
"grad_norm": 0.44541609287261963,
|
| 6803 |
+
"learning_rate": 1.7510573954263864e-08,
|
| 6804 |
+
"loss": 2.1009,
|
| 6805 |
+
"step": 949
|
| 6806 |
+
},
|
| 6807 |
+
{
|
| 6808 |
+
"epoch": 1.9308943089430894,
|
| 6809 |
+
"grad_norm": 0.4000997245311737,
|
| 6810 |
+
"learning_rate": 1.65523355559033e-08,
|
| 6811 |
+
"loss": 2.8416,
|
| 6812 |
+
"step": 950
|
| 6813 |
+
},
|
| 6814 |
+
{
|
| 6815 |
+
"epoch": 1.9308943089430894,
|
| 6816 |
+
"eval_loss": 2.523684024810791,
|
| 6817 |
+
"eval_runtime": 89.4612,
|
| 6818 |
+
"eval_samples_per_second": 0.257,
|
| 6819 |
+
"eval_steps_per_second": 0.134,
|
| 6820 |
+
"step": 950
|
| 6821 |
}
|
| 6822 |
],
|
| 6823 |
"logging_steps": 1,
|
|
|
|
| 6837 |
"attributes": {}
|
| 6838 |
}
|
| 6839 |
},
|
| 6840 |
+
"total_flos": 9.195368100613063e+18,
|
| 6841 |
"train_batch_size": 1,
|
| 6842 |
"trial_name": null,
|
| 6843 |
"trial_params": null
|