PEFT
Safetensors
mistral
axolotl
Generated from Trainer
4-bit precision
bitsandbytes
ToastyPigeon commited on
Commit
a79f664
·
verified ·
1 Parent(s): 1aedc39

Training in progress, step 950, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe1759b3d2196474b0141a45e5c0301b7acd7577de2d7644e9d83466c35af359
3
  size 456206152
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1089357c04ec4a0de85e536d52bb4c8df60d290b4d9d5b00a873e9fd046dbbc
3
  size 456206152
last-checkpoint/optimizer.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57f11133f287c5e1c7ae9bafc3d509f901f222b2daec844eff5e923db9d2f85f
3
  size 912763251
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:341005da48ef83ba8e839e0b70ed4e82e9000785e704bde8bfccb97361384f99
3
  size 912763251
last-checkpoint/pytorch_model_fsdp.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7dc69d550f0a0b90b9d608408e8a52c879c7e99e7dfa034d49bba4421ae3c9b
3
  size 456340209
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26402a2eca103da6a9d310b909392899395babe69e239568a171a2b21830103e
3
  size 456340209
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81b739c0c0556fdc3942ec7039cdb0b52555902e1f2e420f6d965e6994495570
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cdab9c82a05ed01f13b244c083ffefdc46b875ecbe29601f180ef3e698088da
3
  size 14917
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1e8a7c325d4cacdc3150876f7ea6ce044d4707d2b0af2681e0076f1407efb3f
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53d0d0d70f1e731a3047262bd6862bc5a552fb1c97f56fe3ab8a8bfb39f818e9
3
  size 14917
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69fe57146c6b7b3275771cf1c2db2d6495806161adaf6948e7c8319d2b5bbffc
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92949f20b07ea4400476cbbf4d64075409dbdf1f6201cbb60ef6c1f93ae34bd6
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.8292682926829267,
6
  "eval_steps": 50,
7
- "global_step": 900,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -6460,6 +6460,364 @@
6460
  "eval_samples_per_second": 0.257,
6461
  "eval_steps_per_second": 0.134,
6462
  "step": 900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6463
  }
6464
  ],
6465
  "logging_steps": 1,
@@ -6479,7 +6837,7 @@
6479
  "attributes": {}
6480
  }
6481
  },
6482
- "total_flos": 8.712127254458532e+18,
6483
  "train_batch_size": 1,
6484
  "trial_name": null,
6485
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.9308943089430894,
6
  "eval_steps": 50,
7
+ "global_step": 950,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
6460
  "eval_samples_per_second": 0.257,
6461
  "eval_steps_per_second": 0.134,
6462
  "step": 900
6463
+ },
6464
+ {
6465
+ "epoch": 1.83130081300813,
6466
+ "grad_norm": 0.3286548852920532,
6467
+ "learning_rate": 9.484105088313405e-08,
6468
+ "loss": 2.4378,
6469
+ "step": 901
6470
+ },
6471
+ {
6472
+ "epoch": 1.8333333333333335,
6473
+ "grad_norm": 0.3614640533924103,
6474
+ "learning_rate": 9.261033555538562e-08,
6475
+ "loss": 2.5291,
6476
+ "step": 902
6477
+ },
6478
+ {
6479
+ "epoch": 1.8353658536585367,
6480
+ "grad_norm": 0.4283101558685303,
6481
+ "learning_rate": 9.040567210362756e-08,
6482
+ "loss": 2.7602,
6483
+ "step": 903
6484
+ },
6485
+ {
6486
+ "epoch": 1.8373983739837398,
6487
+ "grad_norm": 0.4066496789455414,
6488
+ "learning_rate": 8.822708438590871e-08,
6489
+ "loss": 2.4093,
6490
+ "step": 904
6491
+ },
6492
+ {
6493
+ "epoch": 1.839430894308943,
6494
+ "grad_norm": 0.29669034481048584,
6495
+ "learning_rate": 8.607459597809565e-08,
6496
+ "loss": 2.4789,
6497
+ "step": 905
6498
+ },
6499
+ {
6500
+ "epoch": 1.8414634146341462,
6501
+ "grad_norm": 0.38676008582115173,
6502
+ "learning_rate": 8.394823017361747e-08,
6503
+ "loss": 2.7217,
6504
+ "step": 906
6505
+ },
6506
+ {
6507
+ "epoch": 1.8434959349593496,
6508
+ "grad_norm": 0.3684881627559662,
6509
+ "learning_rate": 8.184800998321418e-08,
6510
+ "loss": 2.5145,
6511
+ "step": 907
6512
+ },
6513
+ {
6514
+ "epoch": 1.845528455284553,
6515
+ "grad_norm": 0.37486544251441956,
6516
+ "learning_rate": 7.977395813468792e-08,
6517
+ "loss": 2.3948,
6518
+ "step": 908
6519
+ },
6520
+ {
6521
+ "epoch": 1.8475609756097562,
6522
+ "grad_norm": 0.38173386454582214,
6523
+ "learning_rate": 7.772609707265732e-08,
6524
+ "loss": 2.4007,
6525
+ "step": 909
6526
+ },
6527
+ {
6528
+ "epoch": 1.8495934959349594,
6529
+ "grad_norm": 0.3323315680027008,
6530
+ "learning_rate": 7.57044489583128e-08,
6531
+ "loss": 2.3632,
6532
+ "step": 910
6533
+ },
6534
+ {
6535
+ "epoch": 1.8516260162601625,
6536
+ "grad_norm": 0.34292104840278625,
6537
+ "learning_rate": 7.370903566917915e-08,
6538
+ "loss": 2.6982,
6539
+ "step": 911
6540
+ },
6541
+ {
6542
+ "epoch": 1.8536585365853657,
6543
+ "grad_norm": 0.36134734749794006,
6544
+ "learning_rate": 7.173987879887683e-08,
6545
+ "loss": 2.5694,
6546
+ "step": 912
6547
+ },
6548
+ {
6549
+ "epoch": 1.8556910569105691,
6550
+ "grad_norm": 0.4461964964866638,
6551
+ "learning_rate": 6.97969996568898e-08,
6552
+ "loss": 2.4623,
6553
+ "step": 913
6554
+ },
6555
+ {
6556
+ "epoch": 1.8577235772357723,
6557
+ "grad_norm": 0.36540645360946655,
6558
+ "learning_rate": 6.788041926833382e-08,
6559
+ "loss": 2.5548,
6560
+ "step": 914
6561
+ },
6562
+ {
6563
+ "epoch": 1.8597560975609757,
6564
+ "grad_norm": 0.3682396113872528,
6565
+ "learning_rate": 6.599015837372907e-08,
6566
+ "loss": 2.5853,
6567
+ "step": 915
6568
+ },
6569
+ {
6570
+ "epoch": 1.8617886178861789,
6571
+ "grad_norm": 0.35821810364723206,
6572
+ "learning_rate": 6.412623742877655e-08,
6573
+ "loss": 2.5411,
6574
+ "step": 916
6575
+ },
6576
+ {
6577
+ "epoch": 1.863821138211382,
6578
+ "grad_norm": 0.44045495986938477,
6579
+ "learning_rate": 6.228867660413557e-08,
6580
+ "loss": 2.2603,
6581
+ "step": 917
6582
+ },
6583
+ {
6584
+ "epoch": 1.8658536585365852,
6585
+ "grad_norm": 0.38515955209732056,
6586
+ "learning_rate": 6.04774957852064e-08,
6587
+ "loss": 2.9653,
6588
+ "step": 918
6589
+ },
6590
+ {
6591
+ "epoch": 1.8678861788617886,
6592
+ "grad_norm": 0.36234351992607117,
6593
+ "learning_rate": 5.869271457191433e-08,
6594
+ "loss": 2.4239,
6595
+ "step": 919
6596
+ },
6597
+ {
6598
+ "epoch": 1.8699186991869918,
6599
+ "grad_norm": 0.3159945011138916,
6600
+ "learning_rate": 5.693435227849875e-08,
6601
+ "loss": 2.4183,
6602
+ "step": 920
6603
+ },
6604
+ {
6605
+ "epoch": 1.8719512195121952,
6606
+ "grad_norm": 0.37130528688430786,
6607
+ "learning_rate": 5.520242793330216e-08,
6608
+ "loss": 2.52,
6609
+ "step": 921
6610
+ },
6611
+ {
6612
+ "epoch": 1.8739837398373984,
6613
+ "grad_norm": 0.4329441487789154,
6614
+ "learning_rate": 5.3496960278565935e-08,
6615
+ "loss": 2.4319,
6616
+ "step": 922
6617
+ },
6618
+ {
6619
+ "epoch": 1.8760162601626016,
6620
+ "grad_norm": 0.32947462797164917,
6621
+ "learning_rate": 5.181796777022713e-08,
6622
+ "loss": 2.4703,
6623
+ "step": 923
6624
+ },
6625
+ {
6626
+ "epoch": 1.8780487804878048,
6627
+ "grad_norm": 0.41265442967414856,
6628
+ "learning_rate": 5.0165468577718924e-08,
6629
+ "loss": 2.8564,
6630
+ "step": 924
6631
+ },
6632
+ {
6633
+ "epoch": 1.8800813008130082,
6634
+ "grad_norm": 0.43159809708595276,
6635
+ "learning_rate": 4.853948058377245e-08,
6636
+ "loss": 2.6758,
6637
+ "step": 925
6638
+ },
6639
+ {
6640
+ "epoch": 1.8821138211382114,
6641
+ "grad_norm": 0.3749174475669861,
6642
+ "learning_rate": 4.6940021384226095e-08,
6643
+ "loss": 2.5812,
6644
+ "step": 926
6645
+ },
6646
+ {
6647
+ "epoch": 1.8841463414634148,
6648
+ "grad_norm": 0.2780403792858124,
6649
+ "learning_rate": 4.5367108287832085e-08,
6650
+ "loss": 2.5903,
6651
+ "step": 927
6652
+ },
6653
+ {
6654
+ "epoch": 1.886178861788618,
6655
+ "grad_norm": 0.4100690484046936,
6656
+ "learning_rate": 4.3820758316071854e-08,
6657
+ "loss": 2.4091,
6658
+ "step": 928
6659
+ },
6660
+ {
6661
+ "epoch": 1.8882113821138211,
6662
+ "grad_norm": 0.4257347583770752,
6663
+ "learning_rate": 4.2300988202969296e-08,
6664
+ "loss": 2.4165,
6665
+ "step": 929
6666
+ },
6667
+ {
6668
+ "epoch": 1.8902439024390243,
6669
+ "grad_norm": 0.3895331621170044,
6670
+ "learning_rate": 4.0807814394911996e-08,
6671
+ "loss": 2.2612,
6672
+ "step": 930
6673
+ },
6674
+ {
6675
+ "epoch": 1.8922764227642277,
6676
+ "grad_norm": 0.41140511631965637,
6677
+ "learning_rate": 3.934125305047165e-08,
6678
+ "loss": 2.6891,
6679
+ "step": 931
6680
+ },
6681
+ {
6682
+ "epoch": 1.8943089430894309,
6683
+ "grad_norm": 0.3074701428413391,
6684
+ "learning_rate": 3.790132004022978e-08,
6685
+ "loss": 2.4966,
6686
+ "step": 932
6687
+ },
6688
+ {
6689
+ "epoch": 1.8963414634146343,
6690
+ "grad_norm": 0.3473949432373047,
6691
+ "learning_rate": 3.6488030946606744e-08,
6692
+ "loss": 2.4893,
6693
+ "step": 933
6694
+ },
6695
+ {
6696
+ "epoch": 1.8983739837398375,
6697
+ "grad_norm": 0.38969168066978455,
6698
+ "learning_rate": 3.510140106369103e-08,
6699
+ "loss": 2.561,
6700
+ "step": 934
6701
+ },
6702
+ {
6703
+ "epoch": 1.9004065040650406,
6704
+ "grad_norm": 0.3749343156814575,
6705
+ "learning_rate": 3.37414453970758e-08,
6706
+ "loss": 2.6589,
6707
+ "step": 935
6708
+ },
6709
+ {
6710
+ "epoch": 1.9024390243902438,
6711
+ "grad_norm": 0.33751150965690613,
6712
+ "learning_rate": 3.2408178663696225e-08,
6713
+ "loss": 2.2882,
6714
+ "step": 936
6715
+ },
6716
+ {
6717
+ "epoch": 1.904471544715447,
6718
+ "grad_norm": 0.40897300839424133,
6719
+ "learning_rate": 3.110161529166878e-08,
6720
+ "loss": 2.456,
6721
+ "step": 937
6722
+ },
6723
+ {
6724
+ "epoch": 1.9065040650406504,
6725
+ "grad_norm": 0.3012900948524475,
6726
+ "learning_rate": 2.982176942013665e-08,
6727
+ "loss": 2.626,
6728
+ "step": 938
6729
+ },
6730
+ {
6731
+ "epoch": 1.9085365853658538,
6732
+ "grad_norm": 0.3892320692539215,
6733
+ "learning_rate": 2.8568654899116254e-08,
6734
+ "loss": 2.8018,
6735
+ "step": 939
6736
+ },
6737
+ {
6738
+ "epoch": 1.910569105691057,
6739
+ "grad_norm": 0.349513441324234,
6740
+ "learning_rate": 2.734228528934679e-08,
6741
+ "loss": 2.2679,
6742
+ "step": 940
6743
+ },
6744
+ {
6745
+ "epoch": 1.9126016260162602,
6746
+ "grad_norm": 0.3486090898513794,
6747
+ "learning_rate": 2.614267386214453e-08,
6748
+ "loss": 2.1905,
6749
+ "step": 941
6750
+ },
6751
+ {
6752
+ "epoch": 1.9146341463414633,
6753
+ "grad_norm": 0.3776640295982361,
6754
+ "learning_rate": 2.49698335992582e-08,
6755
+ "loss": 2.4652,
6756
+ "step": 942
6757
+ },
6758
+ {
6759
+ "epoch": 1.9166666666666665,
6760
+ "grad_norm": 0.3477821350097656,
6761
+ "learning_rate": 2.382377719272938e-08,
6762
+ "loss": 2.6303,
6763
+ "step": 943
6764
+ },
6765
+ {
6766
+ "epoch": 1.91869918699187,
6767
+ "grad_norm": 0.4629431366920471,
6768
+ "learning_rate": 2.2704517044754017e-08,
6769
+ "loss": 2.9256,
6770
+ "step": 944
6771
+ },
6772
+ {
6773
+ "epoch": 1.9207317073170733,
6774
+ "grad_norm": 0.3135490119457245,
6775
+ "learning_rate": 2.161206526754972e-08,
6776
+ "loss": 2.454,
6777
+ "step": 945
6778
+ },
6779
+ {
6780
+ "epoch": 1.9227642276422765,
6781
+ "grad_norm": 0.38131558895111084,
6782
+ "learning_rate": 2.05464336832234e-08,
6783
+ "loss": 2.4313,
6784
+ "step": 946
6785
+ },
6786
+ {
6787
+ "epoch": 1.9247967479674797,
6788
+ "grad_norm": 0.28232431411743164,
6789
+ "learning_rate": 1.9507633823643847e-08,
6790
+ "loss": 2.4393,
6791
+ "step": 947
6792
+ },
6793
+ {
6794
+ "epoch": 1.9268292682926829,
6795
+ "grad_norm": 0.44581282138824463,
6796
+ "learning_rate": 1.849567693031684e-08,
6797
+ "loss": 2.3199,
6798
+ "step": 948
6799
+ },
6800
+ {
6801
+ "epoch": 1.928861788617886,
6802
+ "grad_norm": 0.44541609287261963,
6803
+ "learning_rate": 1.7510573954263864e-08,
6804
+ "loss": 2.1009,
6805
+ "step": 949
6806
+ },
6807
+ {
6808
+ "epoch": 1.9308943089430894,
6809
+ "grad_norm": 0.4000997245311737,
6810
+ "learning_rate": 1.65523355559033e-08,
6811
+ "loss": 2.8416,
6812
+ "step": 950
6813
+ },
6814
+ {
6815
+ "epoch": 1.9308943089430894,
6816
+ "eval_loss": 2.523684024810791,
6817
+ "eval_runtime": 89.4612,
6818
+ "eval_samples_per_second": 0.257,
6819
+ "eval_steps_per_second": 0.134,
6820
+ "step": 950
6821
  }
6822
  ],
6823
  "logging_steps": 1,
 
6837
  "attributes": {}
6838
  }
6839
  },
6840
+ "total_flos": 9.195368100613063e+18,
6841
  "train_batch_size": 1,
6842
  "trial_name": null,
6843
  "trial_params": null