Upload train_log.txt with huggingface_hub
Browse files- train_log.txt +67 -0
train_log.txt
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
NEW RUN 2025-11-25-09-26-52
|
| 2 |
+
{}
|
| 3 |
+
{'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 1, 'contiguous_gradients': True, 'overlap_comm': True, 'allgather_partitions': True, 'reduce_scatter': True, 'allgather_bucket_size': 200000000, 'reduce_bucket_size': 200000000, 'sub_group_size': 1000000000000}, 'activation_checkpointing': {'partition_activations': False, 'cpu_checkpointing': False, 'contiguous_memory_optimization': False, 'synchronize_checkpoint_boundary': False}, 'aio': {'block_size': 1048576, 'queue_depth': 8, 'single_submit': False, 'overlap_events': True, 'thread_count': 1}, 'gradient_accumulation_steps': 1, 'train_micro_batch_size_per_gpu': 4, 'gradient_clipping': 1.0, 'bf16': {'enabled': True}}
|
| 4 |
+
NEW RUN 2025-11-25-09-37-25
|
| 5 |
+
{}
|
| 6 |
+
{'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 1, 'contiguous_gradients': True, 'overlap_comm': True, 'allgather_partitions': True, 'reduce_scatter': True, 'allgather_bucket_size': 200000000, 'reduce_bucket_size': 200000000, 'sub_group_size': 1000000000000}, 'activation_checkpointing': {'partition_activations': False, 'cpu_checkpointing': False, 'contiguous_memory_optimization': False, 'synchronize_checkpoint_boundary': False}, 'aio': {'block_size': 1048576, 'queue_depth': 8, 'single_submit': False, 'overlap_events': True, 'thread_count': 1}, 'gradient_accumulation_steps': 1, 'train_micro_batch_size_per_gpu': 4, 'gradient_clipping': 1.0, 'bf16': {'enabled': True}}
|
| 7 |
+
NEW RUN 2025-11-25-09-44-43
|
| 8 |
+
{}
|
| 9 |
+
{'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 1, 'contiguous_gradients': True, 'overlap_comm': True, 'allgather_partitions': True, 'reduce_scatter': True, 'allgather_bucket_size': 200000000, 'reduce_bucket_size': 200000000, 'sub_group_size': 1000000000000}, 'activation_checkpointing': {'partition_activations': False, 'cpu_checkpointing': False, 'contiguous_memory_optimization': False, 'synchronize_checkpoint_boundary': False}, 'aio': {'block_size': 1048576, 'queue_depth': 8, 'single_submit': False, 'overlap_events': True, 'thread_count': 1}, 'gradient_accumulation_steps': 1, 'train_micro_batch_size_per_gpu': 4, 'gradient_clipping': 1.0, 'bf16': {'enabled': True}}
|
| 10 |
+
NEW RUN 2025-11-25-09-50-38
|
| 11 |
+
{}
|
| 12 |
+
{'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 1, 'contiguous_gradients': True, 'overlap_comm': True, 'allgather_partitions': True, 'reduce_scatter': True, 'allgather_bucket_size': 200000000, 'reduce_bucket_size': 200000000, 'sub_group_size': 1000000000000}, 'activation_checkpointing': {'partition_activations': False, 'cpu_checkpointing': False, 'contiguous_memory_optimization': False, 'synchronize_checkpoint_boundary': False}, 'aio': {'block_size': 1048576, 'queue_depth': 8, 'single_submit': False, 'overlap_events': True, 'thread_count': 1}, 'gradient_accumulation_steps': 1, 'train_micro_batch_size_per_gpu': 4, 'gradient_clipping': 1.0, 'bf16': {'enabled': True}}
|
| 13 |
+
0 0.025781 1.0261 0.00099964 2025-11-25 10:09:27.093266 0
|
| 14 |
+
1 0.023579 1.0239 0.00099850 2025-11-25 10:26:37.813699 1
|
| 15 |
+
2 0.022702 1.0230 0.00099657 2025-11-25 10:43:49.534783 2
|
| 16 |
+
3 0.022195 1.0224 0.00099387 2025-11-25 11:01:01.615835 3
|
| 17 |
+
4 0.021879 1.0221 0.00099039 2025-11-25 11:18:19.783584 4
|
| 18 |
+
5 0.021609 1.0218 0.00098613 2025-11-25 11:35:32.636801 5
|
| 19 |
+
6 0.021345 1.0216 0.00098110 2025-11-25 11:52:47.151118 6
|
| 20 |
+
7 0.021211 1.0214 0.00097530 2025-11-25 12:09:59.923614 7
|
| 21 |
+
8 0.021089 1.0213 0.00096874 2025-11-25 12:27:14.358480 8
|
| 22 |
+
9 0.020925 1.0211 0.00096142 2025-11-25 12:44:35.897918 9
|
| 23 |
+
10 0.020750 1.0210 0.00095334 2025-11-25 13:01:50.870234 10
|
| 24 |
+
11 0.020567 1.0208 0.00094452 2025-11-25 13:19:06.175694 11
|
| 25 |
+
12 0.020457 1.0207 0.00093497 2025-11-25 13:36:21.617379 12
|
| 26 |
+
13 0.020393 1.0206 0.00092468 2025-11-25 13:53:36.589036 13
|
| 27 |
+
14 0.020265 1.0205 0.00091366 2025-11-25 14:10:56.901775 14
|
| 28 |
+
15 0.020128 1.0203 0.00090194 2025-11-25 14:28:11.332730 15
|
| 29 |
+
16 0.020012 1.0202 0.00088950 2025-11-25 14:45:26.016004 16
|
| 30 |
+
17 0.019984 1.0202 0.00087638 2025-11-25 15:02:40.630977 17
|
| 31 |
+
18 0.019842 1.0200 0.00086256 2025-11-25 15:19:57.472631 18
|
| 32 |
+
19 0.019797 1.0200 0.00084807 2025-11-25 15:37:17.611795 19
|
| 33 |
+
20 0.019613 1.0198 0.00083292 2025-11-25 15:54:31.849578 20
|
| 34 |
+
21 0.019541 1.0197 0.00081712 2025-11-25 16:11:47.375506 21
|
| 35 |
+
22 0.019487 1.0197 0.00080068 2025-11-25 16:29:01.942201 22
|
| 36 |
+
23 0.019414 1.0196 0.00078361 2025-11-25 16:46:16.456218 23
|
| 37 |
+
24 0.019260 1.0194 0.00076594 2025-11-25 17:03:35.688394 24
|
| 38 |
+
25 0.019148 1.0193 0.00074766 2025-11-25 17:20:48.369221 25
|
| 39 |
+
26 0.019129 1.0193 0.00072880 2025-11-25 17:38:04.277852 26
|
| 40 |
+
27 0.019049 1.0192 0.00070937 2025-11-25 17:55:18.093633 27
|
| 41 |
+
28 0.018899 1.0191 0.00068939 2025-11-25 18:12:32.176259 28
|
| 42 |
+
29 0.018918 1.0191 0.00066888 2025-11-25 18:29:52.621785 29
|
| 43 |
+
30 0.018736 1.0189 0.00064784 2025-11-25 18:47:06.025710 30
|
| 44 |
+
31 0.018678 1.0189 0.00062629 2025-11-25 19:04:21.670322 31
|
| 45 |
+
32 0.018547 1.0187 0.00060426 2025-11-25 19:21:35.695933 32
|
| 46 |
+
33 0.018510 1.0187 0.00058176 2025-11-25 19:38:50.075519 33
|
| 47 |
+
34 0.018407 1.0186 0.00055881 2025-11-25 19:56:10.413095 34
|
| 48 |
+
35 0.018220 1.0184 0.00053542 2025-11-25 20:13:23.182800 35
|
| 49 |
+
36 0.018256 1.0184 0.00051162 2025-11-25 20:30:37.638736 36
|
| 50 |
+
37 0.018149 1.0183 0.00048742 2025-11-25 20:47:52.047674 37
|
| 51 |
+
38 0.017952 1.0181 0.00046284 2025-11-25 21:05:06.143286 38
|
| 52 |
+
39 0.017898 1.0181 0.00043791 2025-11-25 21:22:26.997221 39
|
| 53 |
+
40 0.017826 1.0180 0.00041263 2025-11-25 21:39:40.958022 40
|
| 54 |
+
41 0.017640 1.0178 0.00038704 2025-11-25 21:56:56.103381 41
|
| 55 |
+
42 0.017676 1.0178 0.00036115 2025-11-25 22:14:10.104491 42
|
| 56 |
+
43 0.017451 1.0176 0.00033498 2025-11-25 22:31:24.668177 43
|
| 57 |
+
44 0.017332 1.0175 0.00030856 2025-11-25 22:48:46.791123 44
|
| 58 |
+
45 0.017387 1.0175 0.00028189 2025-11-25 23:06:01.130256 45
|
| 59 |
+
46 0.017320 1.0175 0.00025502 2025-11-25 23:23:15.997539 46
|
| 60 |
+
47 0.017143 1.0173 0.00022795 2025-11-25 23:40:30.847304 47
|
| 61 |
+
48 0.017033 1.0172 0.00020070 2025-11-25 23:57:46.657894 48
|
| 62 |
+
49 0.016944 1.0171 0.00017331 2025-11-26 00:15:07.375128 49
|
| 63 |
+
50 0.016878 1.0170 0.00014579 2025-11-26 00:32:21.100876 50
|
| 64 |
+
51 0.016759 1.0169 0.00011816 2025-11-26 00:49:36.114103 51
|
| 65 |
+
52 0.016643 1.0168 0.00009044 2025-11-26 01:06:49.338177 52
|
| 66 |
+
53 0.016621 1.0168 0.00006266 2025-11-26 01:24:04.934045 53
|
| 67 |
+
54 0.016519 1.0167 0.00003484 2025-11-26 01:41:25.488925 54
|