thomas-schweich commited on
Commit
528867e
·
1 Parent(s): 960e0bc

Training run: 100K steps (squash-merge from run/run_20260325_110202)

Browse files
checkpoints/step_00100000/.complete ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "format_version": 1,
3
+ "files": {
4
+ "config.json": "f19d120aebbdf210c61f38dcb1244157891593821c86f479b0d7590603476722",
5
+ "model.safetensors": "0d1aaa304162c2a60718cd89486fad9e318ac4025c8fe2e4f1a4fd276ca9c4a8",
6
+ "optimizer.safetensors": "ad554734f8a565356b0b3f79531e8dab4f5fbcc3d16d31b6c24ee2db3bab4e40",
7
+ "training_state.json": "7a8ffc064099b66e20a5616174ddae60c6d69ca6ef9951e448abf0ecb079601e"
8
+ }
9
+ }
checkpoints/step_00100000/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "format_version": 1,
3
+ "checkpoint_type": "pretrain",
4
+ "model_config": {
5
+ "vocab_size": 4278,
6
+ "max_seq_len": 256,
7
+ "n_outcomes": 5,
8
+ "d_model": 640,
9
+ "n_layers": 10,
10
+ "n_heads": 8,
11
+ "d_ff": 2560,
12
+ "dropout": 0.0,
13
+ "rope_base": 10000.0
14
+ },
15
+ "training_config": {
16
+ "lr": 0.0003,
17
+ "weight_decay": 0.01,
18
+ "max_grad_norm": 1.0,
19
+ "warmup_steps": 1000,
20
+ "total_steps": 100000,
21
+ "batch_size": 256,
22
+ "max_ply": 256,
23
+ "discard_ply_limit": false,
24
+ "num_workers": 4,
25
+ "use_amp": true,
26
+ "accumulation_steps": 1,
27
+ "log_interval": 10,
28
+ "eval_interval": 500,
29
+ "checkpoint_interval": 5000,
30
+ "base_seed": 42,
31
+ "val_seed": 9223372036854775807,
32
+ "val_games": 512,
33
+ "checkpoint_dir": "/tmp/logs/run_20260325_110202/checkpoints",
34
+ "log_dir": "/tmp/logs",
35
+ "use_wandb": false,
36
+ "wandb_project": "pawn",
37
+ "device": "cuda"
38
+ }
39
+ }
checkpoints/step_00100000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d1aaa304162c2a60718cd89486fad9e318ac4025c8fe2e4f1a4fd276ca9c4a8
3
+ size 273514592
checkpoints/step_00100000/optimizer.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad554734f8a565356b0b3f79531e8dab4f5fbcc3d16d31b6c24ee2db3bab4e40
3
+ size 547036244
checkpoints/step_00100000/training_state.json ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "format_version": 1,
3
+ "global_step": 100000,
4
+ "scheduler_state_dict": {
5
+ "step": 100000
6
+ },
7
+ "scaler_state_dict": {
8
+ "scale": 2097152.0,
9
+ "growth_factor": 2.0,
10
+ "backoff_factor": 0.5,
11
+ "growth_interval": 2000,
12
+ "_growth_tracker": 9
13
+ },
14
+ "optimizer_meta": {
15
+ "param_groups": [
16
+ {
17
+ "lr": 2.9999999999999997e-05,
18
+ "betas": [
19
+ 0.9,
20
+ 0.999
21
+ ],
22
+ "eps": 1e-08,
23
+ "weight_decay": 0.01,
24
+ "amsgrad": false,
25
+ "maximize": false,
26
+ "foreach": null,
27
+ "capturable": false,
28
+ "differentiable": false,
29
+ "fused": null,
30
+ "decoupled_weight_decay": true,
31
+ "params": [
32
+ 0,
33
+ 1,
34
+ 2,
35
+ 3,
36
+ 4,
37
+ 5,
38
+ 6,
39
+ 7,
40
+ 8,
41
+ 9,
42
+ 10,
43
+ 11,
44
+ 12,
45
+ 13,
46
+ 14,
47
+ 15,
48
+ 16,
49
+ 17,
50
+ 18,
51
+ 19,
52
+ 20,
53
+ 21,
54
+ 22,
55
+ 23,
56
+ 24,
57
+ 25,
58
+ 26,
59
+ 27,
60
+ 28,
61
+ 29,
62
+ 30,
63
+ 31,
64
+ 32,
65
+ 33,
66
+ 34,
67
+ 35,
68
+ 36,
69
+ 37,
70
+ 38,
71
+ 39,
72
+ 40,
73
+ 41,
74
+ 42,
75
+ 43,
76
+ 44,
77
+ 45,
78
+ 46,
79
+ 47,
80
+ 48,
81
+ 49,
82
+ 50,
83
+ 51,
84
+ 52,
85
+ 53,
86
+ 54,
87
+ 55,
88
+ 56,
89
+ 57,
90
+ 58,
91
+ 59,
92
+ 60,
93
+ 61,
94
+ 62,
95
+ 63,
96
+ 64,
97
+ 65,
98
+ 66,
99
+ 67,
100
+ 68,
101
+ 69,
102
+ 70,
103
+ 71,
104
+ 72,
105
+ 73,
106
+ 74,
107
+ 75,
108
+ 76,
109
+ 77,
110
+ 78,
111
+ 79,
112
+ 80,
113
+ 81,
114
+ 82,
115
+ 83,
116
+ 84,
117
+ 85,
118
+ 86,
119
+ 87,
120
+ 88,
121
+ 89,
122
+ 90,
123
+ 91,
124
+ 92,
125
+ 93,
126
+ 94,
127
+ 95,
128
+ 96
129
+ ]
130
+ }
131
+ ],
132
+ "scalars": null
133
+ },
134
+ "torch_rng_state": "h08rNwf38rOtAAAAAQAAAMQBAAAAAAAA5bVTKAAAAAC1CIbqAAAAAA3rKqAAAAAAGqogVAAAAABigsYrAAAAAAK5m78AAAAAcE43pAAAAAB+YHT7AAAAABSmyhcAAAAAiwIFRwAAAABru0i1AAAAAEAG8YYAAAAAyvLLfgAAAADgU+nAAAAAAFqEKAgAAAAA72kt/gAAAAC/V34GAAAAAJRKvuQAAAAARwrwvAAAAAB8+1buAAAAAHw2FqQAAAAATEURKwAAAADppdGbAAAAALFK1WsAAAAA9FjHJwAAAAAKRlfWAAAAAAxZlR8AAAAA7PS73AAAAABeXBUoAAAAAIA1IjIAAAAAC2JcuwAAAACKVAI/AAAAAJ4PbRkAAAAA/OdaFAAAAAB2VuHVAAAAAOjy5bMAAAAAKivF/gAAAACA5OnEAAAAAC+aNjMAAAAABNa0HQAAAABgCGjwAAAAACVYnKcAAAAAYnQAHgAAAACed89WAAAAAMk1u3oAAAAAUKriUQAAAAC9t94zAAAAAPbZbRcAAAAAGOth4wAAAAAs80jFAAAAAKRt/bQAAAAAa6dpYQAAAABipCt+AAAAAN56VTYAAAAAXpizpwAAAABgJXGrAAAAAFiUPsEAAAAAvOynQAAAAACWnN22AAAAAEPkFa4AAAAAsqkleAAAAAAVdN9zAAAAAHvqifQAAAAAA0n6LwAAAACAuTx4AAAAAD6Z93cAAAAApS7GTwAAAADjfskvAAAAAA+ewj4AAAAAl9DQLAAAAAD9kSH/AAAAAEA+GYgAAAAAvsbprwAAAAB3NN+0AAAAANc6YrwAAAAAnfkv2gAAAACMQfjeAAAAAGA0pCgAAAAAvTnvHAAAAAC4jLlTAAAAALc9p4wAAAAAZjDhogAAAADAlSqiAAAAAAzt0eMAAAAAguPkrgAAAACEQa6HAAAAAFfNjdEAAAAAGoYWVAAAAABF3yVKAAAAADgonYgAAAAAb6PyRAAAAACG7abbAAAAABqiUFAAAAAA0SzSDAAAAACeoR0/AAAAAOgi1CcAAAAA9Dw29QAAAABFfJluAAAAAP8tr7AAAAAAaLAamQAAAABjRZ8PAAAAALEV3aUAAAAAAIRuuwAAAACHGl7bAAAAAIcEIUUAAAAASi25VAAAAADR+U0fAAAAAOWbOB4AAAAAmvLGFAAAAAAJNxX1AAAAAHtE2ScAAAAAk2NOTgAAAAD+65BYAAAAAOsZFekAAAAAWIKxJAAAAABiKM6BAAAAAE1YYQEAAAAA2sop3wAAAABCIl6iAAAAAP/T7yYAAAAAWZNJcQAAAABFOZsSAAAAAFzETvMAAAAAcFP0/AAAAADurpxIAAAAAIaDQdMAAAAAsS/3TAAAAAA0YVttAAAAAGstmGoAAAAAQqLbLAAAAABXzRZ4AAAAANqZ/18AAAAAdVwB9AAAAADsfpg2AAAAAMy2zKoAAAAACSy3KAAAAABDzRafAAAAAAzi404AAAAA3pVkeQAAAAAQ5caLAAAAAHXDzEcAAAAAS59+2QAAAABV2J9+AAAAAMejV/YAAAAAKwKUxwAAAADKYpLRAAAAAPvntMAAAAAA5v2IfQAAAAC9+IekAAAAABN+sCwAAAAAgOOzyQAAAABjjSj0AAAAAG2AsasAAAAA4CMoZgAAAAAHpm84AAAAANC8aZAAAAAAfiO4WQAAAAAzUg0HAAAAAGyLSTsAAAAAXBS5MQAAAADs/KtnAAAAAJO9YCIAAAAAnoesBAAAAAD0ogF9AAAAABKv9CQAAAAA1gbspQAAAABmhttUAAAAAOJ/TG4AAAAAhS0PpAAAAADx5KyDAAAAADmNIvEAAAAA5KJXxAAAAAAGOjP/AAAAADzZdKcAAAAAA8FPFgAAAAALDaYcAAAAAB0U9pgAAAAAr2EncwAAAABok7yTAAAAACRvTNIAAAAAcQKJkQAAAAAvcQE8AAAAALcp3hEAAAAA9ChDqgAAAACs3b0CAAAAAHROJI0AAAAA1R3GBgAAAAA6qGxzAAAAAOeWbGcAAAAAU9x6SgAAAADH/vP6AAAAAKOI8XIAAAAA8KA2VQAAAAAfGgFFAAAAANtgjI0AAAAAsGlzhQAAAABVwSsRAAAAAGXdNw0AAAAAbQSpewAAAAAx9MXlAAAAAMmuVXQAAAAAkTwfDwAAAAB9+wFIAAAAALLWklQAAAAASRZ3CgAAAABCqD25AAAAADrP138AAAAAA9Li1AAAAABj0TYLAAAAAHNsR/oAAAAA2maP7gAAAAB/GyXyAAAAAEsu8MgAAAAAspXNQgAAAAC1Xgb/AAAAALkz90EAAAAA48lYngAAAABm+qG1AAAAAMcXfW0AAAAAE90WugAAAADnSDIFAAAAAJmpDgYAAAAAnAkyCAAAAAD/iCk2AAAAAElp5lsAAAAAeHAxnAAAAADbQgcKAAAAAJ8CEbIAAAAAVY4u5wAAAAAOrWjNAAAAAMMgCpAAAAAAt+qJqQAAAACUbpfvAAAAAFTRiEYAAAAAi7yHngAAAAAyQ1DYAAAAAGJgz/QAAAAA7solGQAAAABxT00rAAAAAArI5bYAAAAARGqyPAAAAABFr7c7AAAAAFEE51wAAAAAum0rtgAAAAAMHHNDAAAAAM3EjogAAAAA4o430QAAAADk7fE+AAAAAAViqkEAAAAAGes0MwAAAABdjpDvAAAAAEbUJRQAAAAAdrnGOgAAAABrQWIuAAAAACvnK2sAAAAAEyRlJgAAAAC/12WyAAAAAEBI67MAAAAAtGGEZwAAAABGu6sdAAAAALuyVuYAAAAAgx0QzgAAAABtMTt3AAAAAMjVDVsAAAAARt941wAAAAAgkpeRAAAAAFLnED4AAAAAjv2/6wAAAADia1FwAAAAAJnk6jwAAAAAJ3jF/AAAAAAhFYtcAAAAAD0VY2UAAAAAd3N11gAAAACzKnTWAAAAAGIujsQAAAAAooiH2gAAAAB/yEYxAAAAADjjTNQAAAAASNHEhwAAAABtncUMAAAAADSX9KIAAAAAbiZNSAAAAACwhbuVAAAAALLTcSYAAAAAmSzpFAAAAABq7Ct3AAAAAPWp3D4AAAAAAe4gpwAAAACsTU9qAAAAAP2tI4QAAAAARNCL7QAAAAA06+aVAAAAAJoNQN0AAAAAJWtPhwAAAAAXEn70AAAAAJssGqkAAAAAAKFQUQAAAACT9kT0AAAAAFu3JOUAAAAANO5gIAAAAAB+VuXIAAAAAPHeI4QAAAAADT7B/wAAAADfokXMAAAAAJfg/f0AAAAArlfEXAAAAAAkYKSUAAAAAEQpA7IAAAAALJqctgAAAACjeaRfAAAAABqE3tEAAAAA0rJopAAAAAD4yLv2AAAAAFOxR8MAAAAAeEtDDwAAAACwX8hdAAAAAKjoZU0AAAAAV26whwAAAAC5K7eKAAAAACxfVTIAAAAA7fBnGAAAAACd50/yAAAAAFP5Jh8AAAAAiSQPKAAAAADlfY5NAAAAAP/8zuIAAAAAoS9UEgAAAAD7pTW4AAAAAHB5cVIAAAAARbh83AAAAADaocV3AAAAAG7HtekAAAAAmNEfcwAAAAD3YdvOAAAAAJPCtA4AAAAAoxmOwwAAAABrPuElAAAAAMak4r8AAAAAAUC/cgAAAAAhZCuIAAAAAN2mf+kAAAAAx83h5gAAAAAmbYMwAAAAAJiCnrQAAAAAQU55rAAAAADw+3SUAAAAAM/2XnEAAAAAcU07SwAAAADA5e+DAAAAAHttOgkAAAAAiGbszAAAAABluI1oAAAAAH2uALwAAAAAGUF/5wAAAABmMR5KAAAAALSCYAUAAAAAiKGWXwAAAAAnvn1PAAAAAAqH+bsAAAAABwJn9QAAAAAzsIpwAAAAABrZ0GcAAAAAhuYe5wAAAABO+FTkAAAAAM2/KvIAAAAAsRPS3gAAAACdx440AAAAAP4tumYAAAAA+bmCzQAAAADJHxanAAAAAPXucQIAAAAAonSCwAAAAACgjOtnAAAAABoWVc0AAAAAkdoAbwAAAACz7aNzAAAAAKW4ruYAAAAA8cr63gAAAABE6SNJAAAAAD2BPGMAAAAAdnPyEgAAAAD8U+XcAAAAAAcdXwAAAAAAEvif/wAAAAD8rlYaAAAAACG0El4AAAAA3437sgAAAAALlh8WAAAAAG9kxgcAAAAAdz6Z+QAAAACUVLt6AAAAAMOofScAAAAAiWSpbAAAAACQguXTAAAAAFroExIAAAAApWtqzgAAAABNjjRLAAAAAGf7vuUAAAAAHZvizwAAAAAHed+7AAAAABQIe3UAAAAA4XtGOAAAAAASc2AdAAAAANHNDk8AAAAAJzMFKAAAAAAXXfpMAAAAABu0ce0AAAAA4/NkJgAAAAA/+1OAAAAAADu2hoIAAAAAZmCDfwAAAADXQL4fAAAAAAS4ug0AAAAAG+OmAgAAAADu60ldAAAAAMsuxCUAAAAAj8UWaQAAAACr0RxfAAAAAAMdSFQAAAAA36GiRwAAAABk4uomAAAAAFcoM5EAAAAAs+yK7gAAAABcwIvCAAAAAIIBImQAAAAAfG9I6AAAAAAn4QM2AAAAALGuHggAAAAAcaGswgAAAACpMriNAAAAAI7aXk0AAAAAZTO/DgAAAABLMd42AAAAAEU6FlIAAAAAPk5rDAAAAADcO1eQAAAAABikK+4AAAAA6IPhsAAAAAALNbnjAAAAAH5zJeMAAAAAkpFEPwAAAABcpLkXAAAAAEQpk1gAAAAAg1qEDQAAAABVMEyiAAAAANdjUEYAAAAAlsVqdgAAAAAvFPWeAAAAAKqzNcIAAAAA98nh2wAAAAD29MteAAAAAAO9kOUAAAAAt+873wAAAAAAY/ZgAAAAAFdmymYAAAAAh6+t/wAAAACAUVNnAAAAAGFVacwAAAAAhn/nDQAAAADUmhjPAAAAAHmQWBgAAAAAql9jiwAAAACFC0uIAAAAAMPDQeUAAAAAR0NZ1QAAAABBn5deAAAAAB5TwDYAAAAAJuVeBQAAAABkVY1bAAAAAFMthYgAAAAA3ZKyXQAAAABC6R5uAAAAAPHG1l8AAAAA9cob5QAAAABoNpXQAAAAAADVHtoAAAAAoXyWdAAAAABzlNNLAAAAAAd7JTwAAAAAFQy61gAAAADNezk1AAAAAD3N/EMAAAAAqi9OHQAAAADu9V/uAAAAALEpjVkAAAAAy2UzWwAAAAC2chu5AAAAAIVrQEIAAAAAItdI3gAAAADERnBmAAAAAA34/pwAAAAARc9ZfAAAAAAlM5R9AAAAAMUAqCsAAAAAf48ghQAAAADY/1U9AAAAAGIcDcwAAAAAocqYuAAAAAA0etJoAAAAALJFCoYAAAAA3+LS9gAAAAAG6CGqAAAAAIZELuoAAAAAesiBRAAAAADIDXwHAAAAANY8KSQAAAAAzq0RkAAAAADNNOjsAAAAABZVTTsAAAAAMOJYbAAAAABNxQyWAAAAAOtIPlMAAAAAoxn9VQAAAAAyfQpBAAAAAFCxFm4AAAAAeul3ewAAAADixnHqAAAAACqMW+EAAAAAByiTLAAAAACfpwVeAAAAADppPwAAAAAANCiqtgAAAABv7Qs5AAAAAP5TNKcAAAAAOMhxeQAAAAAww45yAAAAALOIDngAAAAApdPt+gAAAABO/05GAAAAAHDeD98AAAAAH7Ye8QAAAADnCh6QAAAAANAEPSwAAAAAlwTimwAAAAB2scxWAAAAAMq2rHkAAAAAB/b8EwAAAAAQSW3SAAAAAMuKLnsAAAAAmRG5UAAAAAC2sGuCAAAAAIpjqUEAAAAAgvq4QgAAAABBhm3kAAAAAGa9p+kAAAAAJYzu4QAAAADz1dQfAAAAANC6C5YAAAAAbvuQowAAAABJEwlLAAAAAHwIY4oAAAAASP5riAAAAABzZpcJAAAAAKrZpgYAAAAAWaJRswAAAADVqREtAAAAAKjAN84AAAAACawW3wAAAADrqt14AAAAADA/MwwAAAAA6VoIkQAAAADKRR/qAAAAAGMhqpUAAAAA2gUL3QAAAADMhsOJAAAAAHy9Ru8AAAAApXZ9wwAAAAA6EmlLAAAAAM6zmT0AAAAAkPNYqQAAAAD6QheVAAAAAJ82DQgAAAAAqH9qbAAAAACFQMpFAAAAAMH5ZNsAAAAAq5UO+QAAAABRdDH1AAAAAMJR3gsAAAAAlcaqzQAAAADzWWEdAAAAAIQKqtoAAAAAwrvNwAAAAAD17N09AAAAABGwSVwAAAAA+rnH+AAAAAARTa6vAAAAAJLCGA4AAAAAIGRTswAAAAAbOwKbAAAAAFyofAUAAAAALDaxtAAAAADSpDyBAAAAABSlmB8AAAAAEX19IwAAAACEwHF8AAAAADXsqPwAAAAAe5IE4QAAAACZqU9pAAAAAGzMKdEAAAAAZuqdjQAAAABnaWP4AAAAAPW3T6wAAAAApqmHCgAAAACkUeS0AAAAANd0gnIAAAAAJ1219gAAAADYblUyAAAAANaeA3MAAAAA2JW20QAAAABE13o1AAAAAHE31nUAAAAAmABkGAAAAABaQFzGAAAAAKeAVTQAAAAAOU+S1wAAAABpLdz8AAAAAHOfZiIAAAAAKWLzEgAAAABSkJu/AAAAAKedPQQAAAAACtxWlAAAAABVHrLEAAAAAIgBSwMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==",
135
+ "cuda_rng_state": "BWVGVHP/DAAAAAAAAAAAAA=="
136
+ }
metrics.jsonl CHANGED
The diff for this file is too large to render. See raw diff