v1.0.0: searchless_chess vocab, 512 context, 200K steps
Browse filesSquash-merge of run/co_pretraining_2026_04_13 into main.
Replaces the legacy backbone (4,278-token coordinate vocab, 256 context,
prepend_outcome=True, 100K steps) with the v1.0.0 backbone:
- 1,980-token searchless_chess action vocabulary (1,968 reachable actions
+ 1 PAD + 11 outcome tokens)
- 512-token context window
- prepend_outcome=False (no outcome conditioning)
- 200K training steps at batch size 256 (best checkpoint at step 195K)
- 66909440 parameters
Published model.safetensors is the best 5K-cadence checkpoint by val
loss (step 195,000). All intermediate 5K checkpoints are preserved under
checkpoints/ for training-dynamics analysis.
The legacy checkpoint is preserved in the git history of this repo and
is also available at thomas-schweich/pawn-large-legacy.
- checkpoints/step_00005000/.complete +9 -0
- checkpoints/step_00005000/config.json +43 -0
- checkpoints/step_00005000/model.safetensors +3 -0
- checkpoints/step_00005000/optimizer.safetensors +3 -0
- checkpoints/step_00005000/training_state.json +136 -0
- checkpoints/step_00010000/.complete +9 -0
- checkpoints/step_00010000/config.json +43 -0
- checkpoints/step_00010000/model.safetensors +3 -0
- checkpoints/step_00010000/optimizer.safetensors +3 -0
- checkpoints/step_00010000/training_state.json +136 -0
- checkpoints/step_00015000/.complete +9 -0
- checkpoints/step_00015000/config.json +43 -0
- checkpoints/step_00015000/model.safetensors +3 -0
- checkpoints/step_00015000/optimizer.safetensors +3 -0
- checkpoints/step_00015000/training_state.json +136 -0
- checkpoints/step_00020000/.complete +9 -0
- checkpoints/step_00020000/config.json +43 -0
- checkpoints/step_00020000/model.safetensors +3 -0
- checkpoints/step_00020000/optimizer.safetensors +3 -0
- checkpoints/step_00020000/training_state.json +136 -0
- checkpoints/step_00025000/.complete +9 -0
- checkpoints/step_00025000/config.json +43 -0
- checkpoints/step_00025000/model.safetensors +3 -0
- checkpoints/step_00025000/optimizer.safetensors +3 -0
- checkpoints/step_00025000/training_state.json +136 -0
- checkpoints/step_00030000/.complete +9 -0
- checkpoints/step_00030000/config.json +43 -0
- checkpoints/step_00030000/model.safetensors +3 -0
- checkpoints/step_00030000/optimizer.safetensors +3 -0
- checkpoints/step_00030000/training_state.json +136 -0
- checkpoints/step_00035000/.complete +9 -0
- checkpoints/step_00035000/config.json +43 -0
- checkpoints/step_00035000/model.safetensors +3 -0
- checkpoints/step_00035000/optimizer.safetensors +3 -0
- checkpoints/step_00035000/training_state.json +136 -0
- checkpoints/step_00040000/.complete +9 -0
- checkpoints/step_00040000/config.json +43 -0
- checkpoints/step_00040000/model.safetensors +3 -0
- checkpoints/step_00040000/optimizer.safetensors +3 -0
- checkpoints/step_00040000/training_state.json +136 -0
- checkpoints/step_00045000/.complete +9 -0
- checkpoints/step_00045000/config.json +43 -0
- checkpoints/step_00045000/model.safetensors +3 -0
- checkpoints/step_00045000/optimizer.safetensors +3 -0
- checkpoints/step_00045000/training_state.json +136 -0
- checkpoints/step_00050000/.complete +9 -0
- checkpoints/step_00050000/config.json +43 -0
- checkpoints/step_00050000/model.safetensors +3 -0
- checkpoints/step_00050000/optimizer.safetensors +3 -0
- checkpoints/step_00050000/training_state.json +136 -0
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"files": {
|
| 4 |
+
"config.json": "47558a8af42aad173abcc41b3986c57111d276c5c47ac80abaa0bf9e2145ad16",
|
| 5 |
+
"model.safetensors": "f7e703c5b8290ae6a025d7a9cc71c8ecc01eb6918196b56731d53194e0e5c1bd",
|
| 6 |
+
"optimizer.safetensors": "17d33cd206addb45ee211303a071cfabff65cbeb4f09c61cf923db23dcfe2aae",
|
| 7 |
+
"training_state.json": "ebc549eab6cb91cba30b1ff63e521a82853a6970e23998941ec76c6c9665f7b6"
|
| 8 |
+
}
|
| 9 |
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"checkpoint_type": "pretrain",
|
| 4 |
+
"model_config": {
|
| 5 |
+
"vocab_size": 1980,
|
| 6 |
+
"max_seq_len": 512,
|
| 7 |
+
"n_outcomes": 11,
|
| 8 |
+
"d_model": 640,
|
| 9 |
+
"n_layers": 10,
|
| 10 |
+
"n_heads": 8,
|
| 11 |
+
"d_ff": 2560,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"rope_base": 10000.0
|
| 14 |
+
},
|
| 15 |
+
"training_config": {
|
| 16 |
+
"lr": 0.0003,
|
| 17 |
+
"weight_decay": 0.01,
|
| 18 |
+
"max_grad_norm": 1.0,
|
| 19 |
+
"warmup_steps": 10000,
|
| 20 |
+
"total_steps": 200000,
|
| 21 |
+
"batch_size": 256,
|
| 22 |
+
"max_ply": 512,
|
| 23 |
+
"discard_ply_limit": false,
|
| 24 |
+
"num_workers": 4,
|
| 25 |
+
"use_amp": true,
|
| 26 |
+
"accumulation_steps": 1,
|
| 27 |
+
"log_interval": 50,
|
| 28 |
+
"eval_interval": 1000,
|
| 29 |
+
"checkpoint_interval": 5000,
|
| 30 |
+
"pause_after_steps": null,
|
| 31 |
+
"no_outcome_token": false,
|
| 32 |
+
"prepend_outcome": false,
|
| 33 |
+
"mate_boost": 0.0,
|
| 34 |
+
"base_seed": 42,
|
| 35 |
+
"val_seed": 9223372036854775807,
|
| 36 |
+
"val_games": 2048,
|
| 37 |
+
"checkpoint_dir": "checkpoints",
|
| 38 |
+
"log_dir": "/workspace/logs",
|
| 39 |
+
"use_wandb": false,
|
| 40 |
+
"wandb_project": "pawn",
|
| 41 |
+
"device": "cuda"
|
| 42 |
+
}
|
| 43 |
+
}
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7e703c5b8290ae6a025d7a9cc71c8ecc01eb6918196b56731d53194e0e5c1bd
|
| 3 |
+
size 267647080
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:17d33cd206addb45ee211303a071cfabff65cbeb4f09c61cf923db23dcfe2aae
|
| 3 |
+
size 535301204
|
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"global_step": 5000,
|
| 4 |
+
"scheduler_state_dict": {
|
| 5 |
+
"step": 5000
|
| 6 |
+
},
|
| 7 |
+
"scaler_state_dict": {
|
| 8 |
+
"scale": 262144.0,
|
| 9 |
+
"growth_factor": 2.0,
|
| 10 |
+
"backoff_factor": 0.5,
|
| 11 |
+
"growth_interval": 2000,
|
| 12 |
+
"_growth_tracker": 1000
|
| 13 |
+
},
|
| 14 |
+
"optimizer_meta": {
|
| 15 |
+
"param_groups": [
|
| 16 |
+
{
|
| 17 |
+
"lr": 0.00015,
|
| 18 |
+
"betas": [
|
| 19 |
+
0.9,
|
| 20 |
+
0.999
|
| 21 |
+
],
|
| 22 |
+
"eps": 1e-08,
|
| 23 |
+
"weight_decay": 0.01,
|
| 24 |
+
"amsgrad": false,
|
| 25 |
+
"maximize": false,
|
| 26 |
+
"foreach": null,
|
| 27 |
+
"capturable": false,
|
| 28 |
+
"differentiable": false,
|
| 29 |
+
"fused": null,
|
| 30 |
+
"decoupled_weight_decay": true,
|
| 31 |
+
"params": [
|
| 32 |
+
0,
|
| 33 |
+
1,
|
| 34 |
+
2,
|
| 35 |
+
3,
|
| 36 |
+
4,
|
| 37 |
+
5,
|
| 38 |
+
6,
|
| 39 |
+
7,
|
| 40 |
+
8,
|
| 41 |
+
9,
|
| 42 |
+
10,
|
| 43 |
+
11,
|
| 44 |
+
12,
|
| 45 |
+
13,
|
| 46 |
+
14,
|
| 47 |
+
15,
|
| 48 |
+
16,
|
| 49 |
+
17,
|
| 50 |
+
18,
|
| 51 |
+
19,
|
| 52 |
+
20,
|
| 53 |
+
21,
|
| 54 |
+
22,
|
| 55 |
+
23,
|
| 56 |
+
24,
|
| 57 |
+
25,
|
| 58 |
+
26,
|
| 59 |
+
27,
|
| 60 |
+
28,
|
| 61 |
+
29,
|
| 62 |
+
30,
|
| 63 |
+
31,
|
| 64 |
+
32,
|
| 65 |
+
33,
|
| 66 |
+
34,
|
| 67 |
+
35,
|
| 68 |
+
36,
|
| 69 |
+
37,
|
| 70 |
+
38,
|
| 71 |
+
39,
|
| 72 |
+
40,
|
| 73 |
+
41,
|
| 74 |
+
42,
|
| 75 |
+
43,
|
| 76 |
+
44,
|
| 77 |
+
45,
|
| 78 |
+
46,
|
| 79 |
+
47,
|
| 80 |
+
48,
|
| 81 |
+
49,
|
| 82 |
+
50,
|
| 83 |
+
51,
|
| 84 |
+
52,
|
| 85 |
+
53,
|
| 86 |
+
54,
|
| 87 |
+
55,
|
| 88 |
+
56,
|
| 89 |
+
57,
|
| 90 |
+
58,
|
| 91 |
+
59,
|
| 92 |
+
60,
|
| 93 |
+
61,
|
| 94 |
+
62,
|
| 95 |
+
63,
|
| 96 |
+
64,
|
| 97 |
+
65,
|
| 98 |
+
66,
|
| 99 |
+
67,
|
| 100 |
+
68,
|
| 101 |
+
69,
|
| 102 |
+
70,
|
| 103 |
+
71,
|
| 104 |
+
72,
|
| 105 |
+
73,
|
| 106 |
+
74,
|
| 107 |
+
75,
|
| 108 |
+
76,
|
| 109 |
+
77,
|
| 110 |
+
78,
|
| 111 |
+
79,
|
| 112 |
+
80,
|
| 113 |
+
81,
|
| 114 |
+
82,
|
| 115 |
+
83,
|
| 116 |
+
84,
|
| 117 |
+
85,
|
| 118 |
+
86,
|
| 119 |
+
87,
|
| 120 |
+
88,
|
| 121 |
+
89,
|
| 122 |
+
90,
|
| 123 |
+
91,
|
| 124 |
+
92,
|
| 125 |
+
93,
|
| 126 |
+
94,
|
| 127 |
+
95,
|
| 128 |
+
96
|
| 129 |
+
]
|
| 130 |
+
}
|
| 131 |
+
],
|
| 132 |
+
"scalars": null
|
| 133 |
+
},
|
| 134 |
+
"torch_rng_state": "hljcGxUfSQ+fAQAAAQAAANIAAAAAAAAAzj3uUQAAAAAlDbfbAAAAAODXdhYAAAAAHAdTtgAAAABPp7tMAAAAAASjDTUAAAAA5xmHeQAAAADgrPl2AAAAAGDTO+4AAAAAEV5vpQAAAAAoLmVHAAAAAOM3XOwAAAAArcqH+QAAAADiw0BbAAAAAGj5lM8AAAAAcw+GYgAAAACrLjMSAAAAADGQlT4AAAAAaZpDJQAAAACBzZ+gAAAAAPc+rTkAAAAAFSQBQwAAAADp6uHMAAAAAHNXWQYAAAAA2lY4AwAAAAAXrHbYAAAAAKP1wi0AAAAA4zBXUwAAAAASoagbAAAAAAMltJoAAAAAyJh3RwAAAACMKoqOAAAAAEFk+8UAAAAABdjhIAAAAACeoY6ZAAAAAN6XSAgAAAAA4I5MvgAAAADLcLbFAAAAAJBiDvMAAAAABtjFygAAAAD/E32LAAAAAEgRBGMAAAAA7SItpwAAAAA8l1BPAAAAAIp05iwAAAAAcqnPuQAAAABhWFc4AAAAAN3gcLsAAAAAqRf8NAAAAADfX3chAAAAADop2cYAAAAAXkRDAAAAAAAE9144AAAAAIeDKI4AAAAAy6DHcAAAAABuF9jQAAAAABddICQAAAAAdqW3OgAAAACWziEwAAAAAGmr+FAAAAAAj1XhIAAAAAC/foK4AAAAAAH5SLYAAAAAI6MKfAAAAACLG+U3AAAAADn/ly0AAAAANgH0tAAAAABN+QB2AAAAAAly8WIAAAAA5AHsOQAAAAAhSzuMAAAAAAflo4YAAAAAcREqzgAAAADCEfxMAAAAAH0l6y4AAAAAcGr/nQAAAAAqDd21AAAAAJYzfLEAAAAAI3IK4wAAAAB1YRabAAAAAOhNxmwAAAAA6N/jNwAAAACIiTZQAAAAAJXTfhgAAAAAK1dacAAAAAA3A/CSAAAAAMAY52MAAAAASmYhwAAAAAAhywp5AAAAAJLXrvIAAAAABIg7zAAAAAC/Pf4IAAAAAAt1nVQAAAAAo9VJOAAAAADwcwdYAAAAAEbzUaYAAAAA6XoZHQAAAACTcjjdAAAAANXrCksAAAAA0oqT3gAAAACzxja2AAAAAJ+cHWQAAAAAMTfUOAAAAAAF5ZPAAAAAADzzboQAAAAAhOYUwwAAAAAuufxEAAAAANB6LDwAAAAAw2ouuwAAAABUF4EMAAAAAGKc/FMAAAAAxoYYWwAAAACideOdAAAAAOxJAuQAAAAAiaBQZwAAAAAbUJMKAAAAADOqykYAAAAAFw4BNgAAAABlBOehAAAAAArQmRIAAAAAnrPfGQAAAACN1jNwAAAAAGyEccoAAAAAp7rovAAAAAAGI0Z3AAAAALb8PJwAAAAALIImuQAAAACQJRU8AAAAAG5cKlkAAAAAIG6fBgAAAAACC273AAAAAP7o+5gAAAAAToXBRwAAAADFqYJ8AAAAAFT/jrsAAAAAYrhP0wAAAABN0ONuAAAAACzQYcAAAAAALM4z2gAAAADLKdG9AAAAAFbJbG4AAAAAi2dK3QAAAADKrnC1AAAAAGsTRpAAAAAANSwMvwAAAABStYiWAAAAADUfVekAAAAAbPDIYgAAAACtadvrAAAAAJbEir8AAAAAcRp4fAAAAADiXAigAAAAAOhvPmUAAAAA56FHTwAAAAD404Q8AAAAAG/Gb1AAAAAABzBngAAAAAB53GsnAAAAACNJByYAAAAAJWk6mAAAAAAR0/H8AAAAABk8q/IAAAAATyzT3QAAAABjLsHtAAAAAHg0v5IAAAAA6OFrMgAAAADajHYNAAAAAG4RtjQAAAAAZ+TORAAAAAAnp1qMAAAAACHwtoMAAAAA+ds8hAAAAABKzPteAAAAAN4eczUAAAAAkvDP0wAAAACqFw2IAAAAAHoBh4cAAAAAePOLRAAAAABT8DC/AAAAADVTwpEAAAAAUYiatQAAAACXZrLGAAAAAN+HV94AAAAAwxkwXQAAAAABQ1UXAAAAAM8BXBAAAAAARVbEhgAAAABnz0LDAAAAAHTQvKQAAAAA4HY1MQAAAABUAbX5AAAAAJgWAP8AAAAABeXZKAAAAABenJweAAAAAEG+UhoAAAAA5NC0pAAAAACDD2m9AAAAAH08/fgAAAAAQAV27AAAAABy04BGAAAAAO1Isa8AAAAAQwhJCwAAAAAlhx20AAAAAFtpMRUAAAAAOho9HQAAAAAiLbXnAAAAACkQ0dEAAAAAZ4mA4gAAAACylZP9AAAAAObE36gAAAAA0oWi3QAAAABLb+MIAAAAAOxj0fIAAAAAltiwmAAAAAAH3U8bAAAAAGcUCo8AAAAApMjanwAAAADJ4qqNAAAAAGpkiMgAAAAAXCXW+AAAAAD6wxo9AAAAALAyHzEAAAAAl44w/QAAAADHBcyvAAAAAGjeZS4AAAAAlvI2NgAAAABQsOQVAAAAAMn5VB8AAAAA270MUQAAAACwmptEAAAAAI5Il2QAAAAASMbr4gAAAAAnA1HMAAAAAPZJyZwAAAAAM/a23QAAAACZ2LlLAAAAAJm6ic4AAAAALfCgigAAAACcnwxSAAAAAK/X8lsAAAAA3eS8AwAAAABGDHhzAAAAAPXy1CQAAAAAD3+vSwAAAADwF1l7AAAAACjQk50AAAAAZA3zGAAAAACUnvzAAAAAAAdBjNMAAAAATfPJPwAAAABLkJCfAAAAAKtnKQsAAAAA3FdwiAAAAACMII9UAAAAAJOB1P0AAAAAhOeLtQAAAAALcTVxAAAAAA0auq0AAAAAu8nHLwAAAABWNSAVAAAAAPduvpIAAAAAKCzZGQAAAACKQpFkAAAAAP+T+IoAAAAAJqrv3QAAAADyaf/GAAAAADSEtpwAAAAA/nbj+QAAAAA/p4CMAAAAAHMd2ZkAAAAA5OCN/gAAAADLZhkKAAAAAHH9PcQAAAAA7As7sQAAAADp584RAAAAAOFghuoAAAAAYSpZeAAAAABqOyxAAAAAAJb5AZkAAAAAHNhbewAAAACruWdUAAAAAIEE2RAAAAAASCZM9AAAAABH2+4+AAAAAHqg1x4AAAAAmwaCDgAAAABZiDpDAAAAALpp8WkAAAAArSNt2AAAAACAI3NMAAAAAK5sIVkAAAAAvrPq9AAAAACn/ZbJAAAAAE1dcIUAAAAAHJptewAAAAAMpNVdAAAAAJD6r4sAAAAAQ8zgSwAAAAAdMeh2AAAAAOH1IFMAAAAA5s/tgQAAAAA5Hmn4AAAAAD0klXcAAAAACWVZYQAAAADv79iWAAAAAPNqwSgAAAAAykW6vAAAAABd1BdYAAAAAISlGyAAAAAABPGqvwAAAAAzMKygAAAAAP7mIPYAAAAANoxv1gAAAAAQO/EvAAAAALzevV8AAAAAMWauuQAAAAB9lS1nAAAAABLXSnAAAAAACY8rLAAAAABu6imZAAAAAApPPxkAAAAAsZI4fwAAAAC0VCfNAAAAAPMGkaUAAAAAo7i9twAAAABgyShMAAAAAO4ZTm0AAAAAXnq4KQAAAADL712mAAAAAKV48z4AAAAAbD77PgAAAABDgyi1AAAAAF1zypgAAAAADED1NQAAAACdQrKsAAAAAMDyv0sAAAAANJNmVAAAAACQJTCWAAAAAKFhpd8AAAAA5dn+rAAAAADZJnjLAAAAAPsJ4DEAAAAA24AiwwAAAAAavNyuAAAAAJ/90BEAAAAAYcYuYwAAAADqjbAXAAAAAFGRlQYAAAAA3BiyxgAAAAAbacbRAAAAAIct0hIAAAAAdxmaPAAAAAAWsk0aAAAAALkGV08AAAAAPlprVAAAAADgvPghAAAAAMDUJL0AAAAAGkuLWwAAAACBczInAAAAAOvMn9AAAAAArLBMOgAAAABSTKSJAAAAAMhvCI4AAAAAe9BgpQAAAAAmEXUwAAAAAIJdol0AAAAAWbmklAAAAACZLiX1AAAAADCnUEsAAAAAdn4b5QAAAABdMEDSAAAAAHiL1TsAAAAAqTnYKQAAAADNLeXxAAAAACoiIRwAAAAAKjX6cQAAAAC50do0AAAAAPf3e8cAAAAAsgk1bgAAAABkW3TRAAAAADRaJYIAAAAAWZdhCQAAAADXiFFwAAAAAPr229wAAAAAMaIXQQAAAABRdq3UAAAAAA754sIAAAAAM0dpbQAAAADzVNFWAAAAABP2M6gAAAAANQUAZwAAAAB/6mUzAAAAABz6VtIAAAAAR82aQgAAAACFyWWoAAAAAEXSQJEAAAAAq1MIyQAAAADmdtDCAAAAAIyfAlMAAAAA1S2biAAAAAD5ht4UAAAAAJ4ITdcAAAAACGZDpAAAAADcT1FtAAAAAFjAPw4AAAAA50Y9RwAAAABxnhZhAAAAAKFjeD0AAAAApIfw+wAAAAB9EQ/HAAAAAGE6DkQAAAAAEX9+IgAAAABGWglfAAAAAGn+J6sAAAAAHlC6gQAAAACJkBE6AAAAAMfj7cMAAAAA+SBbVgAAAABqj5WSAAAAANrwPY4AAAAAg+X9iQAAAACqRLSaAAAAAMmtyHkAAAAAVSX1DQAAAACUE/7bAAAAACvdcOMAAAAAR5cTIQAAAABG6d1bAAAAAH0P85IAAAAAM9OBXgAAAADgEVo3AAAAAL4EtmkAAAAAFu0k3QAAAADUPQN2AAAAAJxxxi0AAAAATgOS1gAAAAAF3krOAAAAAJroewYAAAAAgZ3e0QAAAADyjQwWAAAAAMUurd8AAAAAW2NEZgAAAAAWS3pdAAAAABeXFoIAAAAAfGiTfgAAAABk3nWsAAAAAKhIgLkAAAAAWPFQMgAAAADom33CAAAAACPH41gAAAAAyQp9TgAAAACIP2hdAAAAALAhF/YAAAAATDvpbAAAAAA2FR7bAAAAAPYJcpIAAAAAin3jXgAAAABYltyNAAAAAIpg+ncAAAAA7Va1tQAAAABh4LXsAAAAACrXx+oAAAAA5cCekAAAAACTshg1AAAAAEYaxXAAAAAAA7zFiQAAAADroW2SAAAAAPNdl9wAAAAA5dsX4QAAAAD513YBAAAAAK/w938AAAAATsHv3wAAAADA5xK6AAAAAFxaulgAAAAA8uuuLwAAAAAhyC1jAAAAAB4/5iUAAAAAd860oAAAAAD6dizeAAAAAGbtswYAAAAAgQoojwAAAAAGbUCOAAAAAMs648AAAAAAkfdm2AAAAACNfYnRAAAAAN9oH8cAAAAAvSC+/wAAAADF3yn1AAAAAA1vZPAAAAAAqcLtmAAAAAC7rpqZAAAAAMXEZ4sAAAAARNjWgAAAAABdWyY5AAAAAMEt0jQAAAAAC19z8gAAAAAbqZQPAAAAAK+o9qsAAAAAswuOBgAAAABNAW9GAAAAADt8dF8AAAAAZmMbqgAAAAAPNnCuAAAAAA6DqC0AAAAAhGAFYgAAAAD4nHoyAAAAAIkR+C0AAAAAMLAerQAAAADByP18AAAAAAkH8qEAAAAAHWPWZwAAAAAxMPGHAAAAAELsVw4AAAAAWmd5KwAAAAArsq8/AAAAAJFH0WkAAAAAYTyXywAAAAAjpkmcAAAAAHHaJlQAAAAAr4NHbQAAAAD18ezaAAAAAC5PbzwAAAAAA3gyPQAAAABdThpbAAAAADo+EOgAAAAAIzzALAAAAACmT8oEAAAAAIdvmScAAAAAWi3kQgAAAAA3Y8JQAAAAAKvFxrsAAAAAtjx3AgAAAACxZyDGAAAAAMsolLAAAAAAIGNeQAAAAAA3/8AuAAAAAILty5YAAAAANZSksgAAAAAA1vvJAAAAAJSPF5kAAAAAw57qCQAAAABgpnIHAAAAAPFd/EoAAAAAInH0DgAAAABMy9xzAAAAAOZtFlMAAAAAmldKOQAAAAB5/aofAAAAADAIsywAAAAAnsUS8AAAAAAUuQZGAAAAAAu8oDkAAAAAqvxZgQAAAACSiDrPAAAAAFx5PUQAAAAAqp8DmgAAAACGA3nKAAAAAGV/oVYAAAAAcljhjAAAAAB8PQ+jAAAAAEV+qF4AAAAANMzXTgAAAAAQZqxfAAAAAN60E00AAAAAOG0aAQAAAACCjtNyAAAAAH1BwuAAAAAABNjhCAAAAACFjlG3AAAAAMasoRoAAAAAkKLWjAAAAAAi2G9hAAAAAOPWDZ8AAAAAtqlkLgAAAABo3ad/AAAAAD64FmsAAAAAvC/PSgAAAAA9Hb+xAAAAAK9MAskAAAAAl+XMQgAAAABx39E0AAAAACbIXnwAAAAAJVZ04QAAAABY6X6gAAAAAK3YOHAAAAAAMzZT8QAAAACTuyyGAAAAAL4ATmEAAAAAxd584AAAAACsP8aBAAAAAGtx69oAAAAA9GzMGQAAAADWDVLzAAAAALh9Dl4AAAAAM0x7cQAAAAAD8hzQAAAAAKRxgnsAAAAAYNAHkgAAAAASg9DNAAAAAFVlqe0AAAAAxNytYgAAAADgp8pcAAAAAPJV590AAAAAI08FAAAAAADuVLlZAAAAAAZVpJUAAAAAUeHEowAAAACTWRPMAAAAAIzCkycAAAAANuimLAAAAABrR25WAAAAAAypjtkAAAAA99Xa0QAAAADT7D44AAAAABqQvrcAAAAAlpMhbgAAAAAJeSMxAAAAAHekH1oAAAAAlRRHCgAAAAAjWM/eAAAAAJchAdsAAAAA505bTAAAAABFJtFuAAAAAExN/+EAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==",
|
| 135 |
+
"cuda_rng_state": "kRard2YmFAAAAAAAAAAAAA=="
|
| 136 |
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"files": {
|
| 4 |
+
"config.json": "47558a8af42aad173abcc41b3986c57111d276c5c47ac80abaa0bf9e2145ad16",
|
| 5 |
+
"model.safetensors": "71aaf5bfc69250dd5c50ea8318af150db30842aced0f07d4ee775b2849c91f9a",
|
| 6 |
+
"optimizer.safetensors": "dbda21bffe58e106ff12e1e4cf442aa05dd5a5af1d6d96577aebac870e2fb4ab",
|
| 7 |
+
"training_state.json": "9cf4676f3afad0598d6120a045c5064ecc9a60213157f405f1cdf04a338c9697"
|
| 8 |
+
}
|
| 9 |
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"checkpoint_type": "pretrain",
|
| 4 |
+
"model_config": {
|
| 5 |
+
"vocab_size": 1980,
|
| 6 |
+
"max_seq_len": 512,
|
| 7 |
+
"n_outcomes": 11,
|
| 8 |
+
"d_model": 640,
|
| 9 |
+
"n_layers": 10,
|
| 10 |
+
"n_heads": 8,
|
| 11 |
+
"d_ff": 2560,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"rope_base": 10000.0
|
| 14 |
+
},
|
| 15 |
+
"training_config": {
|
| 16 |
+
"lr": 0.0003,
|
| 17 |
+
"weight_decay": 0.01,
|
| 18 |
+
"max_grad_norm": 1.0,
|
| 19 |
+
"warmup_steps": 10000,
|
| 20 |
+
"total_steps": 200000,
|
| 21 |
+
"batch_size": 256,
|
| 22 |
+
"max_ply": 512,
|
| 23 |
+
"discard_ply_limit": false,
|
| 24 |
+
"num_workers": 4,
|
| 25 |
+
"use_amp": true,
|
| 26 |
+
"accumulation_steps": 1,
|
| 27 |
+
"log_interval": 50,
|
| 28 |
+
"eval_interval": 1000,
|
| 29 |
+
"checkpoint_interval": 5000,
|
| 30 |
+
"pause_after_steps": null,
|
| 31 |
+
"no_outcome_token": false,
|
| 32 |
+
"prepend_outcome": false,
|
| 33 |
+
"mate_boost": 0.0,
|
| 34 |
+
"base_seed": 42,
|
| 35 |
+
"val_seed": 9223372036854775807,
|
| 36 |
+
"val_games": 2048,
|
| 37 |
+
"checkpoint_dir": "checkpoints",
|
| 38 |
+
"log_dir": "/workspace/logs",
|
| 39 |
+
"use_wandb": false,
|
| 40 |
+
"wandb_project": "pawn",
|
| 41 |
+
"device": "cuda"
|
| 42 |
+
}
|
| 43 |
+
}
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:71aaf5bfc69250dd5c50ea8318af150db30842aced0f07d4ee775b2849c91f9a
|
| 3 |
+
size 267647080
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dbda21bffe58e106ff12e1e4cf442aa05dd5a5af1d6d96577aebac870e2fb4ab
|
| 3 |
+
size 535301204
|
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"global_step": 10000,
|
| 4 |
+
"scheduler_state_dict": {
|
| 5 |
+
"step": 10000
|
| 6 |
+
},
|
| 7 |
+
"scaler_state_dict": {
|
| 8 |
+
"scale": 524288.0,
|
| 9 |
+
"growth_factor": 2.0,
|
| 10 |
+
"backoff_factor": 0.5,
|
| 11 |
+
"growth_interval": 2000,
|
| 12 |
+
"_growth_tracker": 1714
|
| 13 |
+
},
|
| 14 |
+
"optimizer_meta": {
|
| 15 |
+
"param_groups": [
|
| 16 |
+
{
|
| 17 |
+
"lr": 0.0003,
|
| 18 |
+
"betas": [
|
| 19 |
+
0.9,
|
| 20 |
+
0.999
|
| 21 |
+
],
|
| 22 |
+
"eps": 1e-08,
|
| 23 |
+
"weight_decay": 0.01,
|
| 24 |
+
"amsgrad": false,
|
| 25 |
+
"maximize": false,
|
| 26 |
+
"foreach": null,
|
| 27 |
+
"capturable": false,
|
| 28 |
+
"differentiable": false,
|
| 29 |
+
"fused": null,
|
| 30 |
+
"decoupled_weight_decay": true,
|
| 31 |
+
"params": [
|
| 32 |
+
0,
|
| 33 |
+
1,
|
| 34 |
+
2,
|
| 35 |
+
3,
|
| 36 |
+
4,
|
| 37 |
+
5,
|
| 38 |
+
6,
|
| 39 |
+
7,
|
| 40 |
+
8,
|
| 41 |
+
9,
|
| 42 |
+
10,
|
| 43 |
+
11,
|
| 44 |
+
12,
|
| 45 |
+
13,
|
| 46 |
+
14,
|
| 47 |
+
15,
|
| 48 |
+
16,
|
| 49 |
+
17,
|
| 50 |
+
18,
|
| 51 |
+
19,
|
| 52 |
+
20,
|
| 53 |
+
21,
|
| 54 |
+
22,
|
| 55 |
+
23,
|
| 56 |
+
24,
|
| 57 |
+
25,
|
| 58 |
+
26,
|
| 59 |
+
27,
|
| 60 |
+
28,
|
| 61 |
+
29,
|
| 62 |
+
30,
|
| 63 |
+
31,
|
| 64 |
+
32,
|
| 65 |
+
33,
|
| 66 |
+
34,
|
| 67 |
+
35,
|
| 68 |
+
36,
|
| 69 |
+
37,
|
| 70 |
+
38,
|
| 71 |
+
39,
|
| 72 |
+
40,
|
| 73 |
+
41,
|
| 74 |
+
42,
|
| 75 |
+
43,
|
| 76 |
+
44,
|
| 77 |
+
45,
|
| 78 |
+
46,
|
| 79 |
+
47,
|
| 80 |
+
48,
|
| 81 |
+
49,
|
| 82 |
+
50,
|
| 83 |
+
51,
|
| 84 |
+
52,
|
| 85 |
+
53,
|
| 86 |
+
54,
|
| 87 |
+
55,
|
| 88 |
+
56,
|
| 89 |
+
57,
|
| 90 |
+
58,
|
| 91 |
+
59,
|
| 92 |
+
60,
|
| 93 |
+
61,
|
| 94 |
+
62,
|
| 95 |
+
63,
|
| 96 |
+
64,
|
| 97 |
+
65,
|
| 98 |
+
66,
|
| 99 |
+
67,
|
| 100 |
+
68,
|
| 101 |
+
69,
|
| 102 |
+
70,
|
| 103 |
+
71,
|
| 104 |
+
72,
|
| 105 |
+
73,
|
| 106 |
+
74,
|
| 107 |
+
75,
|
| 108 |
+
76,
|
| 109 |
+
77,
|
| 110 |
+
78,
|
| 111 |
+
79,
|
| 112 |
+
80,
|
| 113 |
+
81,
|
| 114 |
+
82,
|
| 115 |
+
83,
|
| 116 |
+
84,
|
| 117 |
+
85,
|
| 118 |
+
86,
|
| 119 |
+
87,
|
| 120 |
+
88,
|
| 121 |
+
89,
|
| 122 |
+
90,
|
| 123 |
+
91,
|
| 124 |
+
92,
|
| 125 |
+
93,
|
| 126 |
+
94,
|
| 127 |
+
95,
|
| 128 |
+
96
|
| 129 |
+
]
|
| 130 |
+
}
|
| 131 |
+
],
|
| 132 |
+
"scalars": null
|
| 133 |
+
},
|
| 134 |
+
"torch_rng_state": "hljcGxUfSQ+fAQAAAQAAANIAAAAAAAAAzj3uUQAAAAAlDbfbAAAAAODXdhYAAAAAHAdTtgAAAABPp7tMAAAAAASjDTUAAAAA5xmHeQAAAADgrPl2AAAAAGDTO+4AAAAAEV5vpQAAAAAoLmVHAAAAAOM3XOwAAAAArcqH+QAAAADiw0BbAAAAAGj5lM8AAAAAcw+GYgAAAACrLjMSAAAAADGQlT4AAAAAaZpDJQAAAACBzZ+gAAAAAPc+rTkAAAAAFSQBQwAAAADp6uHMAAAAAHNXWQYAAAAA2lY4AwAAAAAXrHbYAAAAAKP1wi0AAAAA4zBXUwAAAAASoagbAAAAAAMltJoAAAAAyJh3RwAAAACMKoqOAAAAAEFk+8UAAAAABdjhIAAAAACeoY6ZAAAAAN6XSAgAAAAA4I5MvgAAAADLcLbFAAAAAJBiDvMAAAAABtjFygAAAAD/E32LAAAAAEgRBGMAAAAA7SItpwAAAAA8l1BPAAAAAIp05iwAAAAAcqnPuQAAAABhWFc4AAAAAN3gcLsAAAAAqRf8NAAAAADfX3chAAAAADop2cYAAAAAXkRDAAAAAAAE9144AAAAAIeDKI4AAAAAy6DHcAAAAABuF9jQAAAAABddICQAAAAAdqW3OgAAAACWziEwAAAAAGmr+FAAAAAAj1XhIAAAAAC/foK4AAAAAAH5SLYAAAAAI6MKfAAAAACLG+U3AAAAADn/ly0AAAAANgH0tAAAAABN+QB2AAAAAAly8WIAAAAA5AHsOQAAAAAhSzuMAAAAAAflo4YAAAAAcREqzgAAAADCEfxMAAAAAH0l6y4AAAAAcGr/nQAAAAAqDd21AAAAAJYzfLEAAAAAI3IK4wAAAAB1YRabAAAAAOhNxmwAAAAA6N/jNwAAAACIiTZQAAAAAJXTfhgAAAAAK1dacAAAAAA3A/CSAAAAAMAY52MAAAAASmYhwAAAAAAhywp5AAAAAJLXrvIAAAAABIg7zAAAAAC/Pf4IAAAAAAt1nVQAAAAAo9VJOAAAAADwcwdYAAAAAEbzUaYAAAAA6XoZHQAAAACTcjjdAAAAANXrCksAAAAA0oqT3gAAAACzxja2AAAAAJ+cHWQAAAAAMTfUOAAAAAAF5ZPAAAAAADzzboQAAAAAhOYUwwAAAAAuufxEAAAAANB6LDwAAAAAw2ouuwAAAABUF4EMAAAAAGKc/FMAAAAAxoYYWwAAAACideOdAAAAAOxJAuQAAAAAiaBQZwAAAAAbUJMKAAAAADOqykYAAAAAFw4BNgAAAABlBOehAAAAAArQmRIAAAAAnrPfGQAAAACN1jNwAAAAAGyEccoAAAAAp7rovAAAAAAGI0Z3AAAAALb8PJwAAAAALIImuQAAAACQJRU8AAAAAG5cKlkAAAAAIG6fBgAAAAACC273AAAAAP7o+5gAAAAAToXBRwAAAADFqYJ8AAAAAFT/jrsAAAAAYrhP0wAAAABN0ONuAAAAACzQYcAAAAAALM4z2gAAAADLKdG9AAAAAFbJbG4AAAAAi2dK3QAAAADKrnC1AAAAAGsTRpAAAAAANSwMvwAAAABStYiWAAAAADUfVekAAAAAbPDIYgAAAACtadvrAAAAAJbEir8AAAAAcRp4fAAAAADiXAigAAAAAOhvPmUAAAAA56FHTwAAAAD404Q8AAAAAG/Gb1AAAAAABzBngAAAAAB53GsnAAAAACNJByYAAAAAJWk6mAAAAAAR0/H8AAAAABk8q/IAAAAATyzT3QAAAABjLsHtAAAAAHg0v5IAAAAA6OFrMgAAAADajHYNAAAAAG4RtjQAAAAAZ+TORAAAAAAnp1qMAAAAACHwtoMAAAAA+ds8hAAAAABKzPteAAAAAN4eczUAAAAAkvDP0wAAAACqFw2IAAAAAHoBh4cAAAAAePOLRAAAAABT8DC/AAAAADVTwpEAAAAAUYiatQAAAACXZrLGAAAAAN+HV94AAAAAwxkwXQAAAAABQ1UXAAAAAM8BXBAAAAAARVbEhgAAAABnz0LDAAAAAHTQvKQAAAAA4HY1MQAAAABUAbX5AAAAAJgWAP8AAAAABeXZKAAAAABenJweAAAAAEG+UhoAAAAA5NC0pAAAAACDD2m9AAAAAH08/fgAAAAAQAV27AAAAABy04BGAAAAAO1Isa8AAAAAQwhJCwAAAAAlhx20AAAAAFtpMRUAAAAAOho9HQAAAAAiLbXnAAAAACkQ0dEAAAAAZ4mA4gAAAACylZP9AAAAAObE36gAAAAA0oWi3QAAAABLb+MIAAAAAOxj0fIAAAAAltiwmAAAAAAH3U8bAAAAAGcUCo8AAAAApMjanwAAAADJ4qqNAAAAAGpkiMgAAAAAXCXW+AAAAAD6wxo9AAAAALAyHzEAAAAAl44w/QAAAADHBcyvAAAAAGjeZS4AAAAAlvI2NgAAAABQsOQVAAAAAMn5VB8AAAAA270MUQAAAACwmptEAAAAAI5Il2QAAAAASMbr4gAAAAAnA1HMAAAAAPZJyZwAAAAAM/a23QAAAACZ2LlLAAAAAJm6ic4AAAAALfCgigAAAACcnwxSAAAAAK/X8lsAAAAA3eS8AwAAAABGDHhzAAAAAPXy1CQAAAAAD3+vSwAAAADwF1l7AAAAACjQk50AAAAAZA3zGAAAAACUnvzAAAAAAAdBjNMAAAAATfPJPwAAAABLkJCfAAAAAKtnKQsAAAAA3FdwiAAAAACMII9UAAAAAJOB1P0AAAAAhOeLtQAAAAALcTVxAAAAAA0auq0AAAAAu8nHLwAAAABWNSAVAAAAAPduvpIAAAAAKCzZGQAAAACKQpFkAAAAAP+T+IoAAAAAJqrv3QAAAADyaf/GAAAAADSEtpwAAAAA/nbj+QAAAAA/p4CMAAAAAHMd2ZkAAAAA5OCN/gAAAADLZhkKAAAAAHH9PcQAAAAA7As7sQAAAADp584RAAAAAOFghuoAAAAAYSpZeAAAAABqOyxAAAAAAJb5AZkAAAAAHNhbewAAAACruWdUAAAAAIEE2RAAAAAASCZM9AAAAABH2+4+AAAAAHqg1x4AAAAAmwaCDgAAAABZiDpDAAAAALpp8WkAAAAArSNt2AAAAACAI3NMAAAAAK5sIVkAAAAAvrPq9AAAAACn/ZbJAAAAAE1dcIUAAAAAHJptewAAAAAMpNVdAAAAAJD6r4sAAAAAQ8zgSwAAAAAdMeh2AAAAAOH1IFMAAAAA5s/tgQAAAAA5Hmn4AAAAAD0klXcAAAAACWVZYQAAAADv79iWAAAAAPNqwSgAAAAAykW6vAAAAABd1BdYAAAAAISlGyAAAAAABPGqvwAAAAAzMKygAAAAAP7mIPYAAAAANoxv1gAAAAAQO/EvAAAAALzevV8AAAAAMWauuQAAAAB9lS1nAAAAABLXSnAAAAAACY8rLAAAAABu6imZAAAAAApPPxkAAAAAsZI4fwAAAAC0VCfNAAAAAPMGkaUAAAAAo7i9twAAAABgyShMAAAAAO4ZTm0AAAAAXnq4KQAAAADL712mAAAAAKV48z4AAAAAbD77PgAAAABDgyi1AAAAAF1zypgAAAAADED1NQAAAACdQrKsAAAAAMDyv0sAAAAANJNmVAAAAACQJTCWAAAAAKFhpd8AAAAA5dn+rAAAAADZJnjLAAAAAPsJ4DEAAAAA24AiwwAAAAAavNyuAAAAAJ/90BEAAAAAYcYuYwAAAADqjbAXAAAAAFGRlQYAAAAA3BiyxgAAAAAbacbRAAAAAIct0hIAAAAAdxmaPAAAAAAWsk0aAAAAALkGV08AAAAAPlprVAAAAADgvPghAAAAAMDUJL0AAAAAGkuLWwAAAACBczInAAAAAOvMn9AAAAAArLBMOgAAAABSTKSJAAAAAMhvCI4AAAAAe9BgpQAAAAAmEXUwAAAAAIJdol0AAAAAWbmklAAAAACZLiX1AAAAADCnUEsAAAAAdn4b5QAAAABdMEDSAAAAAHiL1TsAAAAAqTnYKQAAAADNLeXxAAAAACoiIRwAAAAAKjX6cQAAAAC50do0AAAAAPf3e8cAAAAAsgk1bgAAAABkW3TRAAAAADRaJYIAAAAAWZdhCQAAAADXiFFwAAAAAPr229wAAAAAMaIXQQAAAABRdq3UAAAAAA754sIAAAAAM0dpbQAAAADzVNFWAAAAABP2M6gAAAAANQUAZwAAAAB/6mUzAAAAABz6VtIAAAAAR82aQgAAAACFyWWoAAAAAEXSQJEAAAAAq1MIyQAAAADmdtDCAAAAAIyfAlMAAAAA1S2biAAAAAD5ht4UAAAAAJ4ITdcAAAAACGZDpAAAAADcT1FtAAAAAFjAPw4AAAAA50Y9RwAAAABxnhZhAAAAAKFjeD0AAAAApIfw+wAAAAB9EQ/HAAAAAGE6DkQAAAAAEX9+IgAAAABGWglfAAAAAGn+J6sAAAAAHlC6gQAAAACJkBE6AAAAAMfj7cMAAAAA+SBbVgAAAABqj5WSAAAAANrwPY4AAAAAg+X9iQAAAACqRLSaAAAAAMmtyHkAAAAAVSX1DQAAAACUE/7bAAAAACvdcOMAAAAAR5cTIQAAAABG6d1bAAAAAH0P85IAAAAAM9OBXgAAAADgEVo3AAAAAL4EtmkAAAAAFu0k3QAAAADUPQN2AAAAAJxxxi0AAAAATgOS1gAAAAAF3krOAAAAAJroewYAAAAAgZ3e0QAAAADyjQwWAAAAAMUurd8AAAAAW2NEZgAAAAAWS3pdAAAAABeXFoIAAAAAfGiTfgAAAABk3nWsAAAAAKhIgLkAAAAAWPFQMgAAAADom33CAAAAACPH41gAAAAAyQp9TgAAAACIP2hdAAAAALAhF/YAAAAATDvpbAAAAAA2FR7bAAAAAPYJcpIAAAAAin3jXgAAAABYltyNAAAAAIpg+ncAAAAA7Va1tQAAAABh4LXsAAAAACrXx+oAAAAA5cCekAAAAACTshg1AAAAAEYaxXAAAAAAA7zFiQAAAADroW2SAAAAAPNdl9wAAAAA5dsX4QAAAAD513YBAAAAAK/w938AAAAATsHv3wAAAADA5xK6AAAAAFxaulgAAAAA8uuuLwAAAAAhyC1jAAAAAB4/5iUAAAAAd860oAAAAAD6dizeAAAAAGbtswYAAAAAgQoojwAAAAAGbUCOAAAAAMs648AAAAAAkfdm2AAAAACNfYnRAAAAAN9oH8cAAAAAvSC+/wAAAADF3yn1AAAAAA1vZPAAAAAAqcLtmAAAAAC7rpqZAAAAAMXEZ4sAAAAARNjWgAAAAABdWyY5AAAAAMEt0jQAAAAAC19z8gAAAAAbqZQPAAAAAK+o9qsAAAAAswuOBgAAAABNAW9GAAAAADt8dF8AAAAAZmMbqgAAAAAPNnCuAAAAAA6DqC0AAAAAhGAFYgAAAAD4nHoyAAAAAIkR+C0AAAAAMLAerQAAAADByP18AAAAAAkH8qEAAAAAHWPWZwAAAAAxMPGHAAAAAELsVw4AAAAAWmd5KwAAAAArsq8/AAAAAJFH0WkAAAAAYTyXywAAAAAjpkmcAAAAAHHaJlQAAAAAr4NHbQAAAAD18ezaAAAAAC5PbzwAAAAAA3gyPQAAAABdThpbAAAAADo+EOgAAAAAIzzALAAAAACmT8oEAAAAAIdvmScAAAAAWi3kQgAAAAA3Y8JQAAAAAKvFxrsAAAAAtjx3AgAAAACxZyDGAAAAAMsolLAAAAAAIGNeQAAAAAA3/8AuAAAAAILty5YAAAAANZSksgAAAAAA1vvJAAAAAJSPF5kAAAAAw57qCQAAAABgpnIHAAAAAPFd/EoAAAAAInH0DgAAAABMy9xzAAAAAOZtFlMAAAAAmldKOQAAAAB5/aofAAAAADAIsywAAAAAnsUS8AAAAAAUuQZGAAAAAAu8oDkAAAAAqvxZgQAAAACSiDrPAAAAAFx5PUQAAAAAqp8DmgAAAACGA3nKAAAAAGV/oVYAAAAAcljhjAAAAAB8PQ+jAAAAAEV+qF4AAAAANMzXTgAAAAAQZqxfAAAAAN60E00AAAAAOG0aAQAAAACCjtNyAAAAAH1BwuAAAAAABNjhCAAAAACFjlG3AAAAAMasoRoAAAAAkKLWjAAAAAAi2G9hAAAAAOPWDZ8AAAAAtqlkLgAAAABo3ad/AAAAAD64FmsAAAAAvC/PSgAAAAA9Hb+xAAAAAK9MAskAAAAAl+XMQgAAAABx39E0AAAAACbIXnwAAAAAJVZ04QAAAABY6X6gAAAAAK3YOHAAAAAAMzZT8QAAAACTuyyGAAAAAL4ATmEAAAAAxd584AAAAACsP8aBAAAAAGtx69oAAAAA9GzMGQAAAADWDVLzAAAAALh9Dl4AAAAAM0x7cQAAAAAD8hzQAAAAAKRxgnsAAAAAYNAHkgAAAAASg9DNAAAAAFVlqe0AAAAAxNytYgAAAADgp8pcAAAAAPJV590AAAAAI08FAAAAAADuVLlZAAAAAAZVpJUAAAAAUeHEowAAAACTWRPMAAAAAIzCkycAAAAANuimLAAAAABrR25WAAAAAAypjtkAAAAA99Xa0QAAAADT7D44AAAAABqQvrcAAAAAlpMhbgAAAAAJeSMxAAAAAHekH1oAAAAAlRRHCgAAAAAjWM/eAAAAAJchAdsAAAAA505bTAAAAABFJtFuAAAAAExN/+EAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==",
|
| 135 |
+
"cuda_rng_state": "kRard2YmFAAAAAAAAAAAAA=="
|
| 136 |
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"files": {
|
| 4 |
+
"config.json": "47558a8af42aad173abcc41b3986c57111d276c5c47ac80abaa0bf9e2145ad16",
|
| 5 |
+
"model.safetensors": "1bc0d8a21b44974e33e31b0978104d64abd6ad7ab8e2bdcd4a5c3c682da6605f",
|
| 6 |
+
"optimizer.safetensors": "e59b230d4c2377698d3ae0e1c6df668cf20a84778a4ddb0d3dc591d08e730c68",
|
| 7 |
+
"training_state.json": "e4091c358c40aaa0b80ce0f7b7c26b3b29efd8f8be16bab5846aeac434aac82d"
|
| 8 |
+
}
|
| 9 |
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"checkpoint_type": "pretrain",
|
| 4 |
+
"model_config": {
|
| 5 |
+
"vocab_size": 1980,
|
| 6 |
+
"max_seq_len": 512,
|
| 7 |
+
"n_outcomes": 11,
|
| 8 |
+
"d_model": 640,
|
| 9 |
+
"n_layers": 10,
|
| 10 |
+
"n_heads": 8,
|
| 11 |
+
"d_ff": 2560,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"rope_base": 10000.0
|
| 14 |
+
},
|
| 15 |
+
"training_config": {
|
| 16 |
+
"lr": 0.0003,
|
| 17 |
+
"weight_decay": 0.01,
|
| 18 |
+
"max_grad_norm": 1.0,
|
| 19 |
+
"warmup_steps": 10000,
|
| 20 |
+
"total_steps": 200000,
|
| 21 |
+
"batch_size": 256,
|
| 22 |
+
"max_ply": 512,
|
| 23 |
+
"discard_ply_limit": false,
|
| 24 |
+
"num_workers": 4,
|
| 25 |
+
"use_amp": true,
|
| 26 |
+
"accumulation_steps": 1,
|
| 27 |
+
"log_interval": 50,
|
| 28 |
+
"eval_interval": 1000,
|
| 29 |
+
"checkpoint_interval": 5000,
|
| 30 |
+
"pause_after_steps": null,
|
| 31 |
+
"no_outcome_token": false,
|
| 32 |
+
"prepend_outcome": false,
|
| 33 |
+
"mate_boost": 0.0,
|
| 34 |
+
"base_seed": 42,
|
| 35 |
+
"val_seed": 9223372036854775807,
|
| 36 |
+
"val_games": 2048,
|
| 37 |
+
"checkpoint_dir": "checkpoints",
|
| 38 |
+
"log_dir": "/workspace/logs",
|
| 39 |
+
"use_wandb": false,
|
| 40 |
+
"wandb_project": "pawn",
|
| 41 |
+
"device": "cuda"
|
| 42 |
+
}
|
| 43 |
+
}
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1bc0d8a21b44974e33e31b0978104d64abd6ad7ab8e2bdcd4a5c3c682da6605f
|
| 3 |
+
size 267647080
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e59b230d4c2377698d3ae0e1c6df668cf20a84778a4ddb0d3dc591d08e730c68
|
| 3 |
+
size 535301204
|
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"global_step": 15000,
|
| 4 |
+
"scheduler_state_dict": {
|
| 5 |
+
"step": 15000
|
| 6 |
+
},
|
| 7 |
+
"scaler_state_dict": {
|
| 8 |
+
"scale": 1048576.0,
|
| 9 |
+
"growth_factor": 2.0,
|
| 10 |
+
"backoff_factor": 0.5,
|
| 11 |
+
"growth_interval": 2000,
|
| 12 |
+
"_growth_tracker": 1900
|
| 13 |
+
},
|
| 14 |
+
"optimizer_meta": {
|
| 15 |
+
"param_groups": [
|
| 16 |
+
{
|
| 17 |
+
"lr": 0.0002995389065559004,
|
| 18 |
+
"betas": [
|
| 19 |
+
0.9,
|
| 20 |
+
0.999
|
| 21 |
+
],
|
| 22 |
+
"eps": 1e-08,
|
| 23 |
+
"weight_decay": 0.01,
|
| 24 |
+
"amsgrad": false,
|
| 25 |
+
"maximize": false,
|
| 26 |
+
"foreach": null,
|
| 27 |
+
"capturable": false,
|
| 28 |
+
"differentiable": false,
|
| 29 |
+
"fused": null,
|
| 30 |
+
"decoupled_weight_decay": true,
|
| 31 |
+
"params": [
|
| 32 |
+
0,
|
| 33 |
+
1,
|
| 34 |
+
2,
|
| 35 |
+
3,
|
| 36 |
+
4,
|
| 37 |
+
5,
|
| 38 |
+
6,
|
| 39 |
+
7,
|
| 40 |
+
8,
|
| 41 |
+
9,
|
| 42 |
+
10,
|
| 43 |
+
11,
|
| 44 |
+
12,
|
| 45 |
+
13,
|
| 46 |
+
14,
|
| 47 |
+
15,
|
| 48 |
+
16,
|
| 49 |
+
17,
|
| 50 |
+
18,
|
| 51 |
+
19,
|
| 52 |
+
20,
|
| 53 |
+
21,
|
| 54 |
+
22,
|
| 55 |
+
23,
|
| 56 |
+
24,
|
| 57 |
+
25,
|
| 58 |
+
26,
|
| 59 |
+
27,
|
| 60 |
+
28,
|
| 61 |
+
29,
|
| 62 |
+
30,
|
| 63 |
+
31,
|
| 64 |
+
32,
|
| 65 |
+
33,
|
| 66 |
+
34,
|
| 67 |
+
35,
|
| 68 |
+
36,
|
| 69 |
+
37,
|
| 70 |
+
38,
|
| 71 |
+
39,
|
| 72 |
+
40,
|
| 73 |
+
41,
|
| 74 |
+
42,
|
| 75 |
+
43,
|
| 76 |
+
44,
|
| 77 |
+
45,
|
| 78 |
+
46,
|
| 79 |
+
47,
|
| 80 |
+
48,
|
| 81 |
+
49,
|
| 82 |
+
50,
|
| 83 |
+
51,
|
| 84 |
+
52,
|
| 85 |
+
53,
|
| 86 |
+
54,
|
| 87 |
+
55,
|
| 88 |
+
56,
|
| 89 |
+
57,
|
| 90 |
+
58,
|
| 91 |
+
59,
|
| 92 |
+
60,
|
| 93 |
+
61,
|
| 94 |
+
62,
|
| 95 |
+
63,
|
| 96 |
+
64,
|
| 97 |
+
65,
|
| 98 |
+
66,
|
| 99 |
+
67,
|
| 100 |
+
68,
|
| 101 |
+
69,
|
| 102 |
+
70,
|
| 103 |
+
71,
|
| 104 |
+
72,
|
| 105 |
+
73,
|
| 106 |
+
74,
|
| 107 |
+
75,
|
| 108 |
+
76,
|
| 109 |
+
77,
|
| 110 |
+
78,
|
| 111 |
+
79,
|
| 112 |
+
80,
|
| 113 |
+
81,
|
| 114 |
+
82,
|
| 115 |
+
83,
|
| 116 |
+
84,
|
| 117 |
+
85,
|
| 118 |
+
86,
|
| 119 |
+
87,
|
| 120 |
+
88,
|
| 121 |
+
89,
|
| 122 |
+
90,
|
| 123 |
+
91,
|
| 124 |
+
92,
|
| 125 |
+
93,
|
| 126 |
+
94,
|
| 127 |
+
95,
|
| 128 |
+
96
|
| 129 |
+
]
|
| 130 |
+
}
|
| 131 |
+
],
|
| 132 |
+
"scalars": null
|
| 133 |
+
},
|
| 134 |
+
"torch_rng_state": "hljcGxUfSQ+fAQAAAQAAANIAAAAAAAAAzj3uUQAAAAAlDbfbAAAAAODXdhYAAAAAHAdTtgAAAABPp7tMAAAAAASjDTUAAAAA5xmHeQAAAADgrPl2AAAAAGDTO+4AAAAAEV5vpQAAAAAoLmVHAAAAAOM3XOwAAAAArcqH+QAAAADiw0BbAAAAAGj5lM8AAAAAcw+GYgAAAACrLjMSAAAAADGQlT4AAAAAaZpDJQAAAACBzZ+gAAAAAPc+rTkAAAAAFSQBQwAAAADp6uHMAAAAAHNXWQYAAAAA2lY4AwAAAAAXrHbYAAAAAKP1wi0AAAAA4zBXUwAAAAASoagbAAAAAAMltJoAAAAAyJh3RwAAAACMKoqOAAAAAEFk+8UAAAAABdjhIAAAAACeoY6ZAAAAAN6XSAgAAAAA4I5MvgAAAADLcLbFAAAAAJBiDvMAAAAABtjFygAAAAD/E32LAAAAAEgRBGMAAAAA7SItpwAAAAA8l1BPAAAAAIp05iwAAAAAcqnPuQAAAABhWFc4AAAAAN3gcLsAAAAAqRf8NAAAAADfX3chAAAAADop2cYAAAAAXkRDAAAAAAAE9144AAAAAIeDKI4AAAAAy6DHcAAAAABuF9jQAAAAABddICQAAAAAdqW3OgAAAACWziEwAAAAAGmr+FAAAAAAj1XhIAAAAAC/foK4AAAAAAH5SLYAAAAAI6MKfAAAAACLG+U3AAAAADn/ly0AAAAANgH0tAAAAABN+QB2AAAAAAly8WIAAAAA5AHsOQAAAAAhSzuMAAAAAAflo4YAAAAAcREqzgAAAADCEfxMAAAAAH0l6y4AAAAAcGr/nQAAAAAqDd21AAAAAJYzfLEAAAAAI3IK4wAAAAB1YRabAAAAAOhNxmwAAAAA6N/jNwAAAACIiTZQAAAAAJXTfhgAAAAAK1dacAAAAAA3A/CSAAAAAMAY52MAAAAASmYhwAAAAAAhywp5AAAAAJLXrvIAAAAABIg7zAAAAAC/Pf4IAAAAAAt1nVQAAAAAo9VJOAAAAADwcwdYAAAAAEbzUaYAAAAA6XoZHQAAAACTcjjdAAAAANXrCksAAAAA0oqT3gAAAACzxja2AAAAAJ+cHWQAAAAAMTfUOAAAAAAF5ZPAAAAAADzzboQAAAAAhOYUwwAAAAAuufxEAAAAANB6LDwAAAAAw2ouuwAAAABUF4EMAAAAAGKc/FMAAAAAxoYYWwAAAACideOdAAAAAOxJAuQAAAAAiaBQZwAAAAAbUJMKAAAAADOqykYAAAAAFw4BNgAAAABlBOehAAAAAArQmRIAAAAAnrPfGQAAAACN1jNwAAAAAGyEccoAAAAAp7rovAAAAAAGI0Z3AAAAALb8PJwAAAAALIImuQAAAACQJRU8AAAAAG5cKlkAAAAAIG6fBgAAAAACC273AAAAAP7o+5gAAAAAToXBRwAAAADFqYJ8AAAAAFT/jrsAAAAAYrhP0wAAAABN0ONuAAAAACzQYcAAAAAALM4z2gAAAADLKdG9AAAAAFbJbG4AAAAAi2dK3QAAAADKrnC1AAAAAGsTRpAAAAAANSwMvwAAAABStYiWAAAAADUfVekAAAAAbPDIYgAAAACtadvrAAAAAJbEir8AAAAAcRp4fAAAAADiXAigAAAAAOhvPmUAAAAA56FHTwAAAAD404Q8AAAAAG/Gb1AAAAAABzBngAAAAAB53GsnAAAAACNJByYAAAAAJWk6mAAAAAAR0/H8AAAAABk8q/IAAAAATyzT3QAAAABjLsHtAAAAAHg0v5IAAAAA6OFrMgAAAADajHYNAAAAAG4RtjQAAAAAZ+TORAAAAAAnp1qMAAAAACHwtoMAAAAA+ds8hAAAAABKzPteAAAAAN4eczUAAAAAkvDP0wAAAACqFw2IAAAAAHoBh4cAAAAAePOLRAAAAABT8DC/AAAAADVTwpEAAAAAUYiatQAAAACXZrLGAAAAAN+HV94AAAAAwxkwXQAAAAABQ1UXAAAAAM8BXBAAAAAARVbEhgAAAABnz0LDAAAAAHTQvKQAAAAA4HY1MQAAAABUAbX5AAAAAJgWAP8AAAAABeXZKAAAAABenJweAAAAAEG+UhoAAAAA5NC0pAAAAACDD2m9AAAAAH08/fgAAAAAQAV27AAAAABy04BGAAAAAO1Isa8AAAAAQwhJCwAAAAAlhx20AAAAAFtpMRUAAAAAOho9HQAAAAAiLbXnAAAAACkQ0dEAAAAAZ4mA4gAAAACylZP9AAAAAObE36gAAAAA0oWi3QAAAABLb+MIAAAAAOxj0fIAAAAAltiwmAAAAAAH3U8bAAAAAGcUCo8AAAAApMjanwAAAADJ4qqNAAAAAGpkiMgAAAAAXCXW+AAAAAD6wxo9AAAAALAyHzEAAAAAl44w/QAAAADHBcyvAAAAAGjeZS4AAAAAlvI2NgAAAABQsOQVAAAAAMn5VB8AAAAA270MUQAAAACwmptEAAAAAI5Il2QAAAAASMbr4gAAAAAnA1HMAAAAAPZJyZwAAAAAM/a23QAAAACZ2LlLAAAAAJm6ic4AAAAALfCgigAAAACcnwxSAAAAAK/X8lsAAAAA3eS8AwAAAABGDHhzAAAAAPXy1CQAAAAAD3+vSwAAAADwF1l7AAAAACjQk50AAAAAZA3zGAAAAACUnvzAAAAAAAdBjNMAAAAATfPJPwAAAABLkJCfAAAAAKtnKQsAAAAA3FdwiAAAAACMII9UAAAAAJOB1P0AAAAAhOeLtQAAAAALcTVxAAAAAA0auq0AAAAAu8nHLwAAAABWNSAVAAAAAPduvpIAAAAAKCzZGQAAAACKQpFkAAAAAP+T+IoAAAAAJqrv3QAAAADyaf/GAAAAADSEtpwAAAAA/nbj+QAAAAA/p4CMAAAAAHMd2ZkAAAAA5OCN/gAAAADLZhkKAAAAAHH9PcQAAAAA7As7sQAAAADp584RAAAAAOFghuoAAAAAYSpZeAAAAABqOyxAAAAAAJb5AZkAAAAAHNhbewAAAACruWdUAAAAAIEE2RAAAAAASCZM9AAAAABH2+4+AAAAAHqg1x4AAAAAmwaCDgAAAABZiDpDAAAAALpp8WkAAAAArSNt2AAAAACAI3NMAAAAAK5sIVkAAAAAvrPq9AAAAACn/ZbJAAAAAE1dcIUAAAAAHJptewAAAAAMpNVdAAAAAJD6r4sAAAAAQ8zgSwAAAAAdMeh2AAAAAOH1IFMAAAAA5s/tgQAAAAA5Hmn4AAAAAD0klXcAAAAACWVZYQAAAADv79iWAAAAAPNqwSgAAAAAykW6vAAAAABd1BdYAAAAAISlGyAAAAAABPGqvwAAAAAzMKygAAAAAP7mIPYAAAAANoxv1gAAAAAQO/EvAAAAALzevV8AAAAAMWauuQAAAAB9lS1nAAAAABLXSnAAAAAACY8rLAAAAABu6imZAAAAAApPPxkAAAAAsZI4fwAAAAC0VCfNAAAAAPMGkaUAAAAAo7i9twAAAABgyShMAAAAAO4ZTm0AAAAAXnq4KQAAAADL712mAAAAAKV48z4AAAAAbD77PgAAAABDgyi1AAAAAF1zypgAAAAADED1NQAAAACdQrKsAAAAAMDyv0sAAAAANJNmVAAAAACQJTCWAAAAAKFhpd8AAAAA5dn+rAAAAADZJnjLAAAAAPsJ4DEAAAAA24AiwwAAAAAavNyuAAAAAJ/90BEAAAAAYcYuYwAAAADqjbAXAAAAAFGRlQYAAAAA3BiyxgAAAAAbacbRAAAAAIct0hIAAAAAdxmaPAAAAAAWsk0aAAAAALkGV08AAAAAPlprVAAAAADgvPghAAAAAMDUJL0AAAAAGkuLWwAAAACBczInAAAAAOvMn9AAAAAArLBMOgAAAABSTKSJAAAAAMhvCI4AAAAAe9BgpQAAAAAmEXUwAAAAAIJdol0AAAAAWbmklAAAAACZLiX1AAAAADCnUEsAAAAAdn4b5QAAAABdMEDSAAAAAHiL1TsAAAAAqTnYKQAAAADNLeXxAAAAACoiIRwAAAAAKjX6cQAAAAC50do0AAAAAPf3e8cAAAAAsgk1bgAAAABkW3TRAAAAADRaJYIAAAAAWZdhCQAAAADXiFFwAAAAAPr229wAAAAAMaIXQQAAAABRdq3UAAAAAA754sIAAAAAM0dpbQAAAADzVNFWAAAAABP2M6gAAAAANQUAZwAAAAB/6mUzAAAAABz6VtIAAAAAR82aQgAAAACFyWWoAAAAAEXSQJEAAAAAq1MIyQAAAADmdtDCAAAAAIyfAlMAAAAA1S2biAAAAAD5ht4UAAAAAJ4ITdcAAAAACGZDpAAAAADcT1FtAAAAAFjAPw4AAAAA50Y9RwAAAABxnhZhAAAAAKFjeD0AAAAApIfw+wAAAAB9EQ/HAAAAAGE6DkQAAAAAEX9+IgAAAABGWglfAAAAAGn+J6sAAAAAHlC6gQAAAACJkBE6AAAAAMfj7cMAAAAA+SBbVgAAAABqj5WSAAAAANrwPY4AAAAAg+X9iQAAAACqRLSaAAAAAMmtyHkAAAAAVSX1DQAAAACUE/7bAAAAACvdcOMAAAAAR5cTIQAAAABG6d1bAAAAAH0P85IAAAAAM9OBXgAAAADgEVo3AAAAAL4EtmkAAAAAFu0k3QAAAADUPQN2AAAAAJxxxi0AAAAATgOS1gAAAAAF3krOAAAAAJroewYAAAAAgZ3e0QAAAADyjQwWAAAAAMUurd8AAAAAW2NEZgAAAAAWS3pdAAAAABeXFoIAAAAAfGiTfgAAAABk3nWsAAAAAKhIgLkAAAAAWPFQMgAAAADom33CAAAAACPH41gAAAAAyQp9TgAAAACIP2hdAAAAALAhF/YAAAAATDvpbAAAAAA2FR7bAAAAAPYJcpIAAAAAin3jXgAAAABYltyNAAAAAIpg+ncAAAAA7Va1tQAAAABh4LXsAAAAACrXx+oAAAAA5cCekAAAAACTshg1AAAAAEYaxXAAAAAAA7zFiQAAAADroW2SAAAAAPNdl9wAAAAA5dsX4QAAAAD513YBAAAAAK/w938AAAAATsHv3wAAAADA5xK6AAAAAFxaulgAAAAA8uuuLwAAAAAhyC1jAAAAAB4/5iUAAAAAd860oAAAAAD6dizeAAAAAGbtswYAAAAAgQoojwAAAAAGbUCOAAAAAMs648AAAAAAkfdm2AAAAACNfYnRAAAAAN9oH8cAAAAAvSC+/wAAAADF3yn1AAAAAA1vZPAAAAAAqcLtmAAAAAC7rpqZAAAAAMXEZ4sAAAAARNjWgAAAAABdWyY5AAAAAMEt0jQAAAAAC19z8gAAAAAbqZQPAAAAAK+o9qsAAAAAswuOBgAAAABNAW9GAAAAADt8dF8AAAAAZmMbqgAAAAAPNnCuAAAAAA6DqC0AAAAAhGAFYgAAAAD4nHoyAAAAAIkR+C0AAAAAMLAerQAAAADByP18AAAAAAkH8qEAAAAAHWPWZwAAAAAxMPGHAAAAAELsVw4AAAAAWmd5KwAAAAArsq8/AAAAAJFH0WkAAAAAYTyXywAAAAAjpkmcAAAAAHHaJlQAAAAAr4NHbQAAAAD18ezaAAAAAC5PbzwAAAAAA3gyPQAAAABdThpbAAAAADo+EOgAAAAAIzzALAAAAACmT8oEAAAAAIdvmScAAAAAWi3kQgAAAAA3Y8JQAAAAAKvFxrsAAAAAtjx3AgAAAACxZyDGAAAAAMsolLAAAAAAIGNeQAAAAAA3/8AuAAAAAILty5YAAAAANZSksgAAAAAA1vvJAAAAAJSPF5kAAAAAw57qCQAAAABgpnIHAAAAAPFd/EoAAAAAInH0DgAAAABMy9xzAAAAAOZtFlMAAAAAmldKOQAAAAB5/aofAAAAADAIsywAAAAAnsUS8AAAAAAUuQZGAAAAAAu8oDkAAAAAqvxZgQAAAACSiDrPAAAAAFx5PUQAAAAAqp8DmgAAAACGA3nKAAAAAGV/oVYAAAAAcljhjAAAAAB8PQ+jAAAAAEV+qF4AAAAANMzXTgAAAAAQZqxfAAAAAN60E00AAAAAOG0aAQAAAACCjtNyAAAAAH1BwuAAAAAABNjhCAAAAACFjlG3AAAAAMasoRoAAAAAkKLWjAAAAAAi2G9hAAAAAOPWDZ8AAAAAtqlkLgAAAABo3ad/AAAAAD64FmsAAAAAvC/PSgAAAAA9Hb+xAAAAAK9MAskAAAAAl+XMQgAAAABx39E0AAAAACbIXnwAAAAAJVZ04QAAAABY6X6gAAAAAK3YOHAAAAAAMzZT8QAAAACTuyyGAAAAAL4ATmEAAAAAxd584AAAAACsP8aBAAAAAGtx69oAAAAA9GzMGQAAAADWDVLzAAAAALh9Dl4AAAAAM0x7cQAAAAAD8hzQAAAAAKRxgnsAAAAAYNAHkgAAAAASg9DNAAAAAFVlqe0AAAAAxNytYgAAAADgp8pcAAAAAPJV590AAAAAI08FAAAAAADuVLlZAAAAAAZVpJUAAAAAUeHEowAAAACTWRPMAAAAAIzCkycAAAAANuimLAAAAABrR25WAAAAAAypjtkAAAAA99Xa0QAAAADT7D44AAAAABqQvrcAAAAAlpMhbgAAAAAJeSMxAAAAAHekH1oAAAAAlRRHCgAAAAAjWM/eAAAAAJchAdsAAAAA505bTAAAAABFJtFuAAAAAExN/+EAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==",
|
| 135 |
+
"cuda_rng_state": "kRard2YmFAAAAAAAAAAAAA=="
|
| 136 |
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"files": {
|
| 4 |
+
"config.json": "47558a8af42aad173abcc41b3986c57111d276c5c47ac80abaa0bf9e2145ad16",
|
| 5 |
+
"model.safetensors": "a5d4c28aed778533ddc2a3817d4e133c5b9719264656c8e028061b91a0858834",
|
| 6 |
+
"optimizer.safetensors": "f63a12e61c1586c672809818966ff36dbde645a62c9fca8151daff4d8fd23552",
|
| 7 |
+
"training_state.json": "605186ff2013de247939d4d3c5ea8b74676d6abe033c117d3067720c819043f7"
|
| 8 |
+
}
|
| 9 |
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"checkpoint_type": "pretrain",
|
| 4 |
+
"model_config": {
|
| 5 |
+
"vocab_size": 1980,
|
| 6 |
+
"max_seq_len": 512,
|
| 7 |
+
"n_outcomes": 11,
|
| 8 |
+
"d_model": 640,
|
| 9 |
+
"n_layers": 10,
|
| 10 |
+
"n_heads": 8,
|
| 11 |
+
"d_ff": 2560,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"rope_base": 10000.0
|
| 14 |
+
},
|
| 15 |
+
"training_config": {
|
| 16 |
+
"lr": 0.0003,
|
| 17 |
+
"weight_decay": 0.01,
|
| 18 |
+
"max_grad_norm": 1.0,
|
| 19 |
+
"warmup_steps": 10000,
|
| 20 |
+
"total_steps": 200000,
|
| 21 |
+
"batch_size": 256,
|
| 22 |
+
"max_ply": 512,
|
| 23 |
+
"discard_ply_limit": false,
|
| 24 |
+
"num_workers": 4,
|
| 25 |
+
"use_amp": true,
|
| 26 |
+
"accumulation_steps": 1,
|
| 27 |
+
"log_interval": 50,
|
| 28 |
+
"eval_interval": 1000,
|
| 29 |
+
"checkpoint_interval": 5000,
|
| 30 |
+
"pause_after_steps": null,
|
| 31 |
+
"no_outcome_token": false,
|
| 32 |
+
"prepend_outcome": false,
|
| 33 |
+
"mate_boost": 0.0,
|
| 34 |
+
"base_seed": 42,
|
| 35 |
+
"val_seed": 9223372036854775807,
|
| 36 |
+
"val_games": 2048,
|
| 37 |
+
"checkpoint_dir": "checkpoints",
|
| 38 |
+
"log_dir": "/workspace/logs",
|
| 39 |
+
"use_wandb": false,
|
| 40 |
+
"wandb_project": "pawn",
|
| 41 |
+
"device": "cuda"
|
| 42 |
+
}
|
| 43 |
+
}
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a5d4c28aed778533ddc2a3817d4e133c5b9719264656c8e028061b91a0858834
|
| 3 |
+
size 267647080
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f63a12e61c1586c672809818966ff36dbde645a62c9fca8151daff4d8fd23552
|
| 3 |
+
size 535301204
|
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"global_step": 20000,
|
| 4 |
+
"scheduler_state_dict": {
|
| 5 |
+
"step": 20000
|
| 6 |
+
},
|
| 7 |
+
"scaler_state_dict": {
|
| 8 |
+
"scale": 1048576.0,
|
| 9 |
+
"growth_factor": 2.0,
|
| 10 |
+
"backoff_factor": 0.5,
|
| 11 |
+
"growth_interval": 2000,
|
| 12 |
+
"_growth_tracker": 1356
|
| 13 |
+
},
|
| 14 |
+
"optimizer_meta": {
|
| 15 |
+
"param_groups": [
|
| 16 |
+
{
|
| 17 |
+
"lr": 0.0002981587759593675,
|
| 18 |
+
"betas": [
|
| 19 |
+
0.9,
|
| 20 |
+
0.999
|
| 21 |
+
],
|
| 22 |
+
"eps": 1e-08,
|
| 23 |
+
"weight_decay": 0.01,
|
| 24 |
+
"amsgrad": false,
|
| 25 |
+
"maximize": false,
|
| 26 |
+
"foreach": null,
|
| 27 |
+
"capturable": false,
|
| 28 |
+
"differentiable": false,
|
| 29 |
+
"fused": null,
|
| 30 |
+
"decoupled_weight_decay": true,
|
| 31 |
+
"params": [
|
| 32 |
+
0,
|
| 33 |
+
1,
|
| 34 |
+
2,
|
| 35 |
+
3,
|
| 36 |
+
4,
|
| 37 |
+
5,
|
| 38 |
+
6,
|
| 39 |
+
7,
|
| 40 |
+
8,
|
| 41 |
+
9,
|
| 42 |
+
10,
|
| 43 |
+
11,
|
| 44 |
+
12,
|
| 45 |
+
13,
|
| 46 |
+
14,
|
| 47 |
+
15,
|
| 48 |
+
16,
|
| 49 |
+
17,
|
| 50 |
+
18,
|
| 51 |
+
19,
|
| 52 |
+
20,
|
| 53 |
+
21,
|
| 54 |
+
22,
|
| 55 |
+
23,
|
| 56 |
+
24,
|
| 57 |
+
25,
|
| 58 |
+
26,
|
| 59 |
+
27,
|
| 60 |
+
28,
|
| 61 |
+
29,
|
| 62 |
+
30,
|
| 63 |
+
31,
|
| 64 |
+
32,
|
| 65 |
+
33,
|
| 66 |
+
34,
|
| 67 |
+
35,
|
| 68 |
+
36,
|
| 69 |
+
37,
|
| 70 |
+
38,
|
| 71 |
+
39,
|
| 72 |
+
40,
|
| 73 |
+
41,
|
| 74 |
+
42,
|
| 75 |
+
43,
|
| 76 |
+
44,
|
| 77 |
+
45,
|
| 78 |
+
46,
|
| 79 |
+
47,
|
| 80 |
+
48,
|
| 81 |
+
49,
|
| 82 |
+
50,
|
| 83 |
+
51,
|
| 84 |
+
52,
|
| 85 |
+
53,
|
| 86 |
+
54,
|
| 87 |
+
55,
|
| 88 |
+
56,
|
| 89 |
+
57,
|
| 90 |
+
58,
|
| 91 |
+
59,
|
| 92 |
+
60,
|
| 93 |
+
61,
|
| 94 |
+
62,
|
| 95 |
+
63,
|
| 96 |
+
64,
|
| 97 |
+
65,
|
| 98 |
+
66,
|
| 99 |
+
67,
|
| 100 |
+
68,
|
| 101 |
+
69,
|
| 102 |
+
70,
|
| 103 |
+
71,
|
| 104 |
+
72,
|
| 105 |
+
73,
|
| 106 |
+
74,
|
| 107 |
+
75,
|
| 108 |
+
76,
|
| 109 |
+
77,
|
| 110 |
+
78,
|
| 111 |
+
79,
|
| 112 |
+
80,
|
| 113 |
+
81,
|
| 114 |
+
82,
|
| 115 |
+
83,
|
| 116 |
+
84,
|
| 117 |
+
85,
|
| 118 |
+
86,
|
| 119 |
+
87,
|
| 120 |
+
88,
|
| 121 |
+
89,
|
| 122 |
+
90,
|
| 123 |
+
91,
|
| 124 |
+
92,
|
| 125 |
+
93,
|
| 126 |
+
94,
|
| 127 |
+
95,
|
| 128 |
+
96
|
| 129 |
+
]
|
| 130 |
+
}
|
| 131 |
+
],
|
| 132 |
+
"scalars": null
|
| 133 |
+
},
|
| 134 |
+
"torch_rng_state": "hljcGxUfSQ+fAQAAAQAAANIAAAAAAAAAzj3uUQAAAAAlDbfbAAAAAODXdhYAAAAAHAdTtgAAAABPp7tMAAAAAASjDTUAAAAA5xmHeQAAAADgrPl2AAAAAGDTO+4AAAAAEV5vpQAAAAAoLmVHAAAAAOM3XOwAAAAArcqH+QAAAADiw0BbAAAAAGj5lM8AAAAAcw+GYgAAAACrLjMSAAAAADGQlT4AAAAAaZpDJQAAAACBzZ+gAAAAAPc+rTkAAAAAFSQBQwAAAADp6uHMAAAAAHNXWQYAAAAA2lY4AwAAAAAXrHbYAAAAAKP1wi0AAAAA4zBXUwAAAAASoagbAAAAAAMltJoAAAAAyJh3RwAAAACMKoqOAAAAAEFk+8UAAAAABdjhIAAAAACeoY6ZAAAAAN6XSAgAAAAA4I5MvgAAAADLcLbFAAAAAJBiDvMAAAAABtjFygAAAAD/E32LAAAAAEgRBGMAAAAA7SItpwAAAAA8l1BPAAAAAIp05iwAAAAAcqnPuQAAAABhWFc4AAAAAN3gcLsAAAAAqRf8NAAAAADfX3chAAAAADop2cYAAAAAXkRDAAAAAAAE9144AAAAAIeDKI4AAAAAy6DHcAAAAABuF9jQAAAAABddICQAAAAAdqW3OgAAAACWziEwAAAAAGmr+FAAAAAAj1XhIAAAAAC/foK4AAAAAAH5SLYAAAAAI6MKfAAAAACLG+U3AAAAADn/ly0AAAAANgH0tAAAAABN+QB2AAAAAAly8WIAAAAA5AHsOQAAAAAhSzuMAAAAAAflo4YAAAAAcREqzgAAAADCEfxMAAAAAH0l6y4AAAAAcGr/nQAAAAAqDd21AAAAAJYzfLEAAAAAI3IK4wAAAAB1YRabAAAAAOhNxmwAAAAA6N/jNwAAAACIiTZQAAAAAJXTfhgAAAAAK1dacAAAAAA3A/CSAAAAAMAY52MAAAAASmYhwAAAAAAhywp5AAAAAJLXrvIAAAAABIg7zAAAAAC/Pf4IAAAAAAt1nVQAAAAAo9VJOAAAAADwcwdYAAAAAEbzUaYAAAAA6XoZHQAAAACTcjjdAAAAANXrCksAAAAA0oqT3gAAAACzxja2AAAAAJ+cHWQAAAAAMTfUOAAAAAAF5ZPAAAAAADzzboQAAAAAhOYUwwAAAAAuufxEAAAAANB6LDwAAAAAw2ouuwAAAABUF4EMAAAAAGKc/FMAAAAAxoYYWwAAAACideOdAAAAAOxJAuQAAAAAiaBQZwAAAAAbUJMKAAAAADOqykYAAAAAFw4BNgAAAABlBOehAAAAAArQmRIAAAAAnrPfGQAAAACN1jNwAAAAAGyEccoAAAAAp7rovAAAAAAGI0Z3AAAAALb8PJwAAAAALIImuQAAAACQJRU8AAAAAG5cKlkAAAAAIG6fBgAAAAACC273AAAAAP7o+5gAAAAAToXBRwAAAADFqYJ8AAAAAFT/jrsAAAAAYrhP0wAAAABN0ONuAAAAACzQYcAAAAAALM4z2gAAAADLKdG9AAAAAFbJbG4AAAAAi2dK3QAAAADKrnC1AAAAAGsTRpAAAAAANSwMvwAAAABStYiWAAAAADUfVekAAAAAbPDIYgAAAACtadvrAAAAAJbEir8AAAAAcRp4fAAAAADiXAigAAAAAOhvPmUAAAAA56FHTwAAAAD404Q8AAAAAG/Gb1AAAAAABzBngAAAAAB53GsnAAAAACNJByYAAAAAJWk6mAAAAAAR0/H8AAAAABk8q/IAAAAATyzT3QAAAABjLsHtAAAAAHg0v5IAAAAA6OFrMgAAAADajHYNAAAAAG4RtjQAAAAAZ+TORAAAAAAnp1qMAAAAACHwtoMAAAAA+ds8hAAAAABKzPteAAAAAN4eczUAAAAAkvDP0wAAAACqFw2IAAAAAHoBh4cAAAAAePOLRAAAAABT8DC/AAAAADVTwpEAAAAAUYiatQAAAACXZrLGAAAAAN+HV94AAAAAwxkwXQAAAAABQ1UXAAAAAM8BXBAAAAAARVbEhgAAAABnz0LDAAAAAHTQvKQAAAAA4HY1MQAAAABUAbX5AAAAAJgWAP8AAAAABeXZKAAAAABenJweAAAAAEG+UhoAAAAA5NC0pAAAAACDD2m9AAAAAH08/fgAAAAAQAV27AAAAABy04BGAAAAAO1Isa8AAAAAQwhJCwAAAAAlhx20AAAAAFtpMRUAAAAAOho9HQAAAAAiLbXnAAAAACkQ0dEAAAAAZ4mA4gAAAACylZP9AAAAAObE36gAAAAA0oWi3QAAAABLb+MIAAAAAOxj0fIAAAAAltiwmAAAAAAH3U8bAAAAAGcUCo8AAAAApMjanwAAAADJ4qqNAAAAAGpkiMgAAAAAXCXW+AAAAAD6wxo9AAAAALAyHzEAAAAAl44w/QAAAADHBcyvAAAAAGjeZS4AAAAAlvI2NgAAAABQsOQVAAAAAMn5VB8AAAAA270MUQAAAACwmptEAAAAAI5Il2QAAAAASMbr4gAAAAAnA1HMAAAAAPZJyZwAAAAAM/a23QAAAACZ2LlLAAAAAJm6ic4AAAAALfCgigAAAACcnwxSAAAAAK/X8lsAAAAA3eS8AwAAAABGDHhzAAAAAPXy1CQAAAAAD3+vSwAAAADwF1l7AAAAACjQk50AAAAAZA3zGAAAAACUnvzAAAAAAAdBjNMAAAAATfPJPwAAAABLkJCfAAAAAKtnKQsAAAAA3FdwiAAAAACMII9UAAAAAJOB1P0AAAAAhOeLtQAAAAALcTVxAAAAAA0auq0AAAAAu8nHLwAAAABWNSAVAAAAAPduvpIAAAAAKCzZGQAAAACKQpFkAAAAAP+T+IoAAAAAJqrv3QAAAADyaf/GAAAAADSEtpwAAAAA/nbj+QAAAAA/p4CMAAAAAHMd2ZkAAAAA5OCN/gAAAADLZhkKAAAAAHH9PcQAAAAA7As7sQAAAADp584RAAAAAOFghuoAAAAAYSpZeAAAAABqOyxAAAAAAJb5AZkAAAAAHNhbewAAAACruWdUAAAAAIEE2RAAAAAASCZM9AAAAABH2+4+AAAAAHqg1x4AAAAAmwaCDgAAAABZiDpDAAAAALpp8WkAAAAArSNt2AAAAACAI3NMAAAAAK5sIVkAAAAAvrPq9AAAAACn/ZbJAAAAAE1dcIUAAAAAHJptewAAAAAMpNVdAAAAAJD6r4sAAAAAQ8zgSwAAAAAdMeh2AAAAAOH1IFMAAAAA5s/tgQAAAAA5Hmn4AAAAAD0klXcAAAAACWVZYQAAAADv79iWAAAAAPNqwSgAAAAAykW6vAAAAABd1BdYAAAAAISlGyAAAAAABPGqvwAAAAAzMKygAAAAAP7mIPYAAAAANoxv1gAAAAAQO/EvAAAAALzevV8AAAAAMWauuQAAAAB9lS1nAAAAABLXSnAAAAAACY8rLAAAAABu6imZAAAAAApPPxkAAAAAsZI4fwAAAAC0VCfNAAAAAPMGkaUAAAAAo7i9twAAAABgyShMAAAAAO4ZTm0AAAAAXnq4KQAAAADL712mAAAAAKV48z4AAAAAbD77PgAAAABDgyi1AAAAAF1zypgAAAAADED1NQAAAACdQrKsAAAAAMDyv0sAAAAANJNmVAAAAACQJTCWAAAAAKFhpd8AAAAA5dn+rAAAAADZJnjLAAAAAPsJ4DEAAAAA24AiwwAAAAAavNyuAAAAAJ/90BEAAAAAYcYuYwAAAADqjbAXAAAAAFGRlQYAAAAA3BiyxgAAAAAbacbRAAAAAIct0hIAAAAAdxmaPAAAAAAWsk0aAAAAALkGV08AAAAAPlprVAAAAADgvPghAAAAAMDUJL0AAAAAGkuLWwAAAACBczInAAAAAOvMn9AAAAAArLBMOgAAAABSTKSJAAAAAMhvCI4AAAAAe9BgpQAAAAAmEXUwAAAAAIJdol0AAAAAWbmklAAAAACZLiX1AAAAADCnUEsAAAAAdn4b5QAAAABdMEDSAAAAAHiL1TsAAAAAqTnYKQAAAADNLeXxAAAAACoiIRwAAAAAKjX6cQAAAAC50do0AAAAAPf3e8cAAAAAsgk1bgAAAABkW3TRAAAAADRaJYIAAAAAWZdhCQAAAADXiFFwAAAAAPr229wAAAAAMaIXQQAAAABRdq3UAAAAAA754sIAAAAAM0dpbQAAAADzVNFWAAAAABP2M6gAAAAANQUAZwAAAAB/6mUzAAAAABz6VtIAAAAAR82aQgAAAACFyWWoAAAAAEXSQJEAAAAAq1MIyQAAAADmdtDCAAAAAIyfAlMAAAAA1S2biAAAAAD5ht4UAAAAAJ4ITdcAAAAACGZDpAAAAADcT1FtAAAAAFjAPw4AAAAA50Y9RwAAAABxnhZhAAAAAKFjeD0AAAAApIfw+wAAAAB9EQ/HAAAAAGE6DkQAAAAAEX9+IgAAAABGWglfAAAAAGn+J6sAAAAAHlC6gQAAAACJkBE6AAAAAMfj7cMAAAAA+SBbVgAAAABqj5WSAAAAANrwPY4AAAAAg+X9iQAAAACqRLSaAAAAAMmtyHkAAAAAVSX1DQAAAACUE/7bAAAAACvdcOMAAAAAR5cTIQAAAABG6d1bAAAAAH0P85IAAAAAM9OBXgAAAADgEVo3AAAAAL4EtmkAAAAAFu0k3QAAAADUPQN2AAAAAJxxxi0AAAAATgOS1gAAAAAF3krOAAAAAJroewYAAAAAgZ3e0QAAAADyjQwWAAAAAMUurd8AAAAAW2NEZgAAAAAWS3pdAAAAABeXFoIAAAAAfGiTfgAAAABk3nWsAAAAAKhIgLkAAAAAWPFQMgAAAADom33CAAAAACPH41gAAAAAyQp9TgAAAACIP2hdAAAAALAhF/YAAAAATDvpbAAAAAA2FR7bAAAAAPYJcpIAAAAAin3jXgAAAABYltyNAAAAAIpg+ncAAAAA7Va1tQAAAABh4LXsAAAAACrXx+oAAAAA5cCekAAAAACTshg1AAAAAEYaxXAAAAAAA7zFiQAAAADroW2SAAAAAPNdl9wAAAAA5dsX4QAAAAD513YBAAAAAK/w938AAAAATsHv3wAAAADA5xK6AAAAAFxaulgAAAAA8uuuLwAAAAAhyC1jAAAAAB4/5iUAAAAAd860oAAAAAD6dizeAAAAAGbtswYAAAAAgQoojwAAAAAGbUCOAAAAAMs648AAAAAAkfdm2AAAAACNfYnRAAAAAN9oH8cAAAAAvSC+/wAAAADF3yn1AAAAAA1vZPAAAAAAqcLtmAAAAAC7rpqZAAAAAMXEZ4sAAAAARNjWgAAAAABdWyY5AAAAAMEt0jQAAAAAC19z8gAAAAAbqZQPAAAAAK+o9qsAAAAAswuOBgAAAABNAW9GAAAAADt8dF8AAAAAZmMbqgAAAAAPNnCuAAAAAA6DqC0AAAAAhGAFYgAAAAD4nHoyAAAAAIkR+C0AAAAAMLAerQAAAADByP18AAAAAAkH8qEAAAAAHWPWZwAAAAAxMPGHAAAAAELsVw4AAAAAWmd5KwAAAAArsq8/AAAAAJFH0WkAAAAAYTyXywAAAAAjpkmcAAAAAHHaJlQAAAAAr4NHbQAAAAD18ezaAAAAAC5PbzwAAAAAA3gyPQAAAABdThpbAAAAADo+EOgAAAAAIzzALAAAAACmT8oEAAAAAIdvmScAAAAAWi3kQgAAAAA3Y8JQAAAAAKvFxrsAAAAAtjx3AgAAAACxZyDGAAAAAMsolLAAAAAAIGNeQAAAAAA3/8AuAAAAAILty5YAAAAANZSksgAAAAAA1vvJAAAAAJSPF5kAAAAAw57qCQAAAABgpnIHAAAAAPFd/EoAAAAAInH0DgAAAABMy9xzAAAAAOZtFlMAAAAAmldKOQAAAAB5/aofAAAAADAIsywAAAAAnsUS8AAAAAAUuQZGAAAAAAu8oDkAAAAAqvxZgQAAAACSiDrPAAAAAFx5PUQAAAAAqp8DmgAAAACGA3nKAAAAAGV/oVYAAAAAcljhjAAAAAB8PQ+jAAAAAEV+qF4AAAAANMzXTgAAAAAQZqxfAAAAAN60E00AAAAAOG0aAQAAAACCjtNyAAAAAH1BwuAAAAAABNjhCAAAAACFjlG3AAAAAMasoRoAAAAAkKLWjAAAAAAi2G9hAAAAAOPWDZ8AAAAAtqlkLgAAAABo3ad/AAAAAD64FmsAAAAAvC/PSgAAAAA9Hb+xAAAAAK9MAskAAAAAl+XMQgAAAABx39E0AAAAACbIXnwAAAAAJVZ04QAAAABY6X6gAAAAAK3YOHAAAAAAMzZT8QAAAACTuyyGAAAAAL4ATmEAAAAAxd584AAAAACsP8aBAAAAAGtx69oAAAAA9GzMGQAAAADWDVLzAAAAALh9Dl4AAAAAM0x7cQAAAAAD8hzQAAAAAKRxgnsAAAAAYNAHkgAAAAASg9DNAAAAAFVlqe0AAAAAxNytYgAAAADgp8pcAAAAAPJV590AAAAAI08FAAAAAADuVLlZAAAAAAZVpJUAAAAAUeHEowAAAACTWRPMAAAAAIzCkycAAAAANuimLAAAAABrR25WAAAAAAypjtkAAAAA99Xa0QAAAADT7D44AAAAABqQvrcAAAAAlpMhbgAAAAAJeSMxAAAAAHekH1oAAAAAlRRHCgAAAAAjWM/eAAAAAJchAdsAAAAA505bTAAAAABFJtFuAAAAAExN/+EAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==",
|
| 135 |
+
"cuda_rng_state": "kRard2YmFAAAAAAAAAAAAA=="
|
| 136 |
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"files": {
|
| 4 |
+
"config.json": "47558a8af42aad173abcc41b3986c57111d276c5c47ac80abaa0bf9e2145ad16",
|
| 5 |
+
"model.safetensors": "7f819b5e9d9aa0c023b62626fb28b07234ba2935210738edb34dc2c6276d8431",
|
| 6 |
+
"optimizer.safetensors": "649fd55cca182a48228fba570c4afcd7eed1aad56feb54409a1beef26c7478a9",
|
| 7 |
+
"training_state.json": "495c1fc2315e765c490c83411c96181ec7c512c0117b698c6bfa2cdf24ed1764"
|
| 8 |
+
}
|
| 9 |
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"checkpoint_type": "pretrain",
|
| 4 |
+
"model_config": {
|
| 5 |
+
"vocab_size": 1980,
|
| 6 |
+
"max_seq_len": 512,
|
| 7 |
+
"n_outcomes": 11,
|
| 8 |
+
"d_model": 640,
|
| 9 |
+
"n_layers": 10,
|
| 10 |
+
"n_heads": 8,
|
| 11 |
+
"d_ff": 2560,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"rope_base": 10000.0
|
| 14 |
+
},
|
| 15 |
+
"training_config": {
|
| 16 |
+
"lr": 0.0003,
|
| 17 |
+
"weight_decay": 0.01,
|
| 18 |
+
"max_grad_norm": 1.0,
|
| 19 |
+
"warmup_steps": 10000,
|
| 20 |
+
"total_steps": 200000,
|
| 21 |
+
"batch_size": 256,
|
| 22 |
+
"max_ply": 512,
|
| 23 |
+
"discard_ply_limit": false,
|
| 24 |
+
"num_workers": 4,
|
| 25 |
+
"use_amp": true,
|
| 26 |
+
"accumulation_steps": 1,
|
| 27 |
+
"log_interval": 50,
|
| 28 |
+
"eval_interval": 1000,
|
| 29 |
+
"checkpoint_interval": 5000,
|
| 30 |
+
"pause_after_steps": null,
|
| 31 |
+
"no_outcome_token": false,
|
| 32 |
+
"prepend_outcome": false,
|
| 33 |
+
"mate_boost": 0.0,
|
| 34 |
+
"base_seed": 42,
|
| 35 |
+
"val_seed": 9223372036854775807,
|
| 36 |
+
"val_games": 2048,
|
| 37 |
+
"checkpoint_dir": "checkpoints",
|
| 38 |
+
"log_dir": "/workspace/logs",
|
| 39 |
+
"use_wandb": false,
|
| 40 |
+
"wandb_project": "pawn",
|
| 41 |
+
"device": "cuda"
|
| 42 |
+
}
|
| 43 |
+
}
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f819b5e9d9aa0c023b62626fb28b07234ba2935210738edb34dc2c6276d8431
|
| 3 |
+
size 267647080
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:649fd55cca182a48228fba570c4afcd7eed1aad56feb54409a1beef26c7478a9
|
| 3 |
+
size 535301204
|
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"global_step": 25000,
|
| 4 |
+
"scheduler_state_dict": {
|
| 5 |
+
"step": 25000
|
| 6 |
+
},
|
| 7 |
+
"scaler_state_dict": {
|
| 8 |
+
"scale": 524288.0,
|
| 9 |
+
"growth_factor": 2.0,
|
| 10 |
+
"backoff_factor": 0.5,
|
| 11 |
+
"growth_interval": 2000,
|
| 12 |
+
"_growth_tracker": 262
|
| 13 |
+
},
|
| 14 |
+
"optimizer_meta": {
|
| 15 |
+
"param_groups": [
|
| 16 |
+
{
|
| 17 |
+
"lr": 0.00029586903590180956,
|
| 18 |
+
"betas": [
|
| 19 |
+
0.9,
|
| 20 |
+
0.999
|
| 21 |
+
],
|
| 22 |
+
"eps": 1e-08,
|
| 23 |
+
"weight_decay": 0.01,
|
| 24 |
+
"amsgrad": false,
|
| 25 |
+
"maximize": false,
|
| 26 |
+
"foreach": null,
|
| 27 |
+
"capturable": false,
|
| 28 |
+
"differentiable": false,
|
| 29 |
+
"fused": null,
|
| 30 |
+
"decoupled_weight_decay": true,
|
| 31 |
+
"params": [
|
| 32 |
+
0,
|
| 33 |
+
1,
|
| 34 |
+
2,
|
| 35 |
+
3,
|
| 36 |
+
4,
|
| 37 |
+
5,
|
| 38 |
+
6,
|
| 39 |
+
7,
|
| 40 |
+
8,
|
| 41 |
+
9,
|
| 42 |
+
10,
|
| 43 |
+
11,
|
| 44 |
+
12,
|
| 45 |
+
13,
|
| 46 |
+
14,
|
| 47 |
+
15,
|
| 48 |
+
16,
|
| 49 |
+
17,
|
| 50 |
+
18,
|
| 51 |
+
19,
|
| 52 |
+
20,
|
| 53 |
+
21,
|
| 54 |
+
22,
|
| 55 |
+
23,
|
| 56 |
+
24,
|
| 57 |
+
25,
|
| 58 |
+
26,
|
| 59 |
+
27,
|
| 60 |
+
28,
|
| 61 |
+
29,
|
| 62 |
+
30,
|
| 63 |
+
31,
|
| 64 |
+
32,
|
| 65 |
+
33,
|
| 66 |
+
34,
|
| 67 |
+
35,
|
| 68 |
+
36,
|
| 69 |
+
37,
|
| 70 |
+
38,
|
| 71 |
+
39,
|
| 72 |
+
40,
|
| 73 |
+
41,
|
| 74 |
+
42,
|
| 75 |
+
43,
|
| 76 |
+
44,
|
| 77 |
+
45,
|
| 78 |
+
46,
|
| 79 |
+
47,
|
| 80 |
+
48,
|
| 81 |
+
49,
|
| 82 |
+
50,
|
| 83 |
+
51,
|
| 84 |
+
52,
|
| 85 |
+
53,
|
| 86 |
+
54,
|
| 87 |
+
55,
|
| 88 |
+
56,
|
| 89 |
+
57,
|
| 90 |
+
58,
|
| 91 |
+
59,
|
| 92 |
+
60,
|
| 93 |
+
61,
|
| 94 |
+
62,
|
| 95 |
+
63,
|
| 96 |
+
64,
|
| 97 |
+
65,
|
| 98 |
+
66,
|
| 99 |
+
67,
|
| 100 |
+
68,
|
| 101 |
+
69,
|
| 102 |
+
70,
|
| 103 |
+
71,
|
| 104 |
+
72,
|
| 105 |
+
73,
|
| 106 |
+
74,
|
| 107 |
+
75,
|
| 108 |
+
76,
|
| 109 |
+
77,
|
| 110 |
+
78,
|
| 111 |
+
79,
|
| 112 |
+
80,
|
| 113 |
+
81,
|
| 114 |
+
82,
|
| 115 |
+
83,
|
| 116 |
+
84,
|
| 117 |
+
85,
|
| 118 |
+
86,
|
| 119 |
+
87,
|
| 120 |
+
88,
|
| 121 |
+
89,
|
| 122 |
+
90,
|
| 123 |
+
91,
|
| 124 |
+
92,
|
| 125 |
+
93,
|
| 126 |
+
94,
|
| 127 |
+
95,
|
| 128 |
+
96
|
| 129 |
+
]
|
| 130 |
+
}
|
| 131 |
+
],
|
| 132 |
+
"scalars": null
|
| 133 |
+
},
|
| 134 |
+
"torch_rng_state": "hljcGxUfSQ+fAQAAAQAAANIAAAAAAAAAzj3uUQAAAAAlDbfbAAAAAODXdhYAAAAAHAdTtgAAAABPp7tMAAAAAASjDTUAAAAA5xmHeQAAAADgrPl2AAAAAGDTO+4AAAAAEV5vpQAAAAAoLmVHAAAAAOM3XOwAAAAArcqH+QAAAADiw0BbAAAAAGj5lM8AAAAAcw+GYgAAAACrLjMSAAAAADGQlT4AAAAAaZpDJQAAAACBzZ+gAAAAAPc+rTkAAAAAFSQBQwAAAADp6uHMAAAAAHNXWQYAAAAA2lY4AwAAAAAXrHbYAAAAAKP1wi0AAAAA4zBXUwAAAAASoagbAAAAAAMltJoAAAAAyJh3RwAAAACMKoqOAAAAAEFk+8UAAAAABdjhIAAAAACeoY6ZAAAAAN6XSAgAAAAA4I5MvgAAAADLcLbFAAAAAJBiDvMAAAAABtjFygAAAAD/E32LAAAAAEgRBGMAAAAA7SItpwAAAAA8l1BPAAAAAIp05iwAAAAAcqnPuQAAAABhWFc4AAAAAN3gcLsAAAAAqRf8NAAAAADfX3chAAAAADop2cYAAAAAXkRDAAAAAAAE9144AAAAAIeDKI4AAAAAy6DHcAAAAABuF9jQAAAAABddICQAAAAAdqW3OgAAAACWziEwAAAAAGmr+FAAAAAAj1XhIAAAAAC/foK4AAAAAAH5SLYAAAAAI6MKfAAAAACLG+U3AAAAADn/ly0AAAAANgH0tAAAAABN+QB2AAAAAAly8WIAAAAA5AHsOQAAAAAhSzuMAAAAAAflo4YAAAAAcREqzgAAAADCEfxMAAAAAH0l6y4AAAAAcGr/nQAAAAAqDd21AAAAAJYzfLEAAAAAI3IK4wAAAAB1YRabAAAAAOhNxmwAAAAA6N/jNwAAAACIiTZQAAAAAJXTfhgAAAAAK1dacAAAAAA3A/CSAAAAAMAY52MAAAAASmYhwAAAAAAhywp5AAAAAJLXrvIAAAAABIg7zAAAAAC/Pf4IAAAAAAt1nVQAAAAAo9VJOAAAAADwcwdYAAAAAEbzUaYAAAAA6XoZHQAAAACTcjjdAAAAANXrCksAAAAA0oqT3gAAAACzxja2AAAAAJ+cHWQAAAAAMTfUOAAAAAAF5ZPAAAAAADzzboQAAAAAhOYUwwAAAAAuufxEAAAAANB6LDwAAAAAw2ouuwAAAABUF4EMAAAAAGKc/FMAAAAAxoYYWwAAAACideOdAAAAAOxJAuQAAAAAiaBQZwAAAAAbUJMKAAAAADOqykYAAAAAFw4BNgAAAABlBOehAAAAAArQmRIAAAAAnrPfGQAAAACN1jNwAAAAAGyEccoAAAAAp7rovAAAAAAGI0Z3AAAAALb8PJwAAAAALIImuQAAAACQJRU8AAAAAG5cKlkAAAAAIG6fBgAAAAACC273AAAAAP7o+5gAAAAAToXBRwAAAADFqYJ8AAAAAFT/jrsAAAAAYrhP0wAAAABN0ONuAAAAACzQYcAAAAAALM4z2gAAAADLKdG9AAAAAFbJbG4AAAAAi2dK3QAAAADKrnC1AAAAAGsTRpAAAAAANSwMvwAAAABStYiWAAAAADUfVekAAAAAbPDIYgAAAACtadvrAAAAAJbEir8AAAAAcRp4fAAAAADiXAigAAAAAOhvPmUAAAAA56FHTwAAAAD404Q8AAAAAG/Gb1AAAAAABzBngAAAAAB53GsnAAAAACNJByYAAAAAJWk6mAAAAAAR0/H8AAAAABk8q/IAAAAATyzT3QAAAABjLsHtAAAAAHg0v5IAAAAA6OFrMgAAAADajHYNAAAAAG4RtjQAAAAAZ+TORAAAAAAnp1qMAAAAACHwtoMAAAAA+ds8hAAAAABKzPteAAAAAN4eczUAAAAAkvDP0wAAAACqFw2IAAAAAHoBh4cAAAAAePOLRAAAAABT8DC/AAAAADVTwpEAAAAAUYiatQAAAACXZrLGAAAAAN+HV94AAAAAwxkwXQAAAAABQ1UXAAAAAM8BXBAAAAAARVbEhgAAAABnz0LDAAAAAHTQvKQAAAAA4HY1MQAAAABUAbX5AAAAAJgWAP8AAAAABeXZKAAAAABenJweAAAAAEG+UhoAAAAA5NC0pAAAAACDD2m9AAAAAH08/fgAAAAAQAV27AAAAABy04BGAAAAAO1Isa8AAAAAQwhJCwAAAAAlhx20AAAAAFtpMRUAAAAAOho9HQAAAAAiLbXnAAAAACkQ0dEAAAAAZ4mA4gAAAACylZP9AAAAAObE36gAAAAA0oWi3QAAAABLb+MIAAAAAOxj0fIAAAAAltiwmAAAAAAH3U8bAAAAAGcUCo8AAAAApMjanwAAAADJ4qqNAAAAAGpkiMgAAAAAXCXW+AAAAAD6wxo9AAAAALAyHzEAAAAAl44w/QAAAADHBcyvAAAAAGjeZS4AAAAAlvI2NgAAAABQsOQVAAAAAMn5VB8AAAAA270MUQAAAACwmptEAAAAAI5Il2QAAAAASMbr4gAAAAAnA1HMAAAAAPZJyZwAAAAAM/a23QAAAACZ2LlLAAAAAJm6ic4AAAAALfCgigAAAACcnwxSAAAAAK/X8lsAAAAA3eS8AwAAAABGDHhzAAAAAPXy1CQAAAAAD3+vSwAAAADwF1l7AAAAACjQk50AAAAAZA3zGAAAAACUnvzAAAAAAAdBjNMAAAAATfPJPwAAAABLkJCfAAAAAKtnKQsAAAAA3FdwiAAAAACMII9UAAAAAJOB1P0AAAAAhOeLtQAAAAALcTVxAAAAAA0auq0AAAAAu8nHLwAAAABWNSAVAAAAAPduvpIAAAAAKCzZGQAAAACKQpFkAAAAAP+T+IoAAAAAJqrv3QAAAADyaf/GAAAAADSEtpwAAAAA/nbj+QAAAAA/p4CMAAAAAHMd2ZkAAAAA5OCN/gAAAADLZhkKAAAAAHH9PcQAAAAA7As7sQAAAADp584RAAAAAOFghuoAAAAAYSpZeAAAAABqOyxAAAAAAJb5AZkAAAAAHNhbewAAAACruWdUAAAAAIEE2RAAAAAASCZM9AAAAABH2+4+AAAAAHqg1x4AAAAAmwaCDgAAAABZiDpDAAAAALpp8WkAAAAArSNt2AAAAACAI3NMAAAAAK5sIVkAAAAAvrPq9AAAAACn/ZbJAAAAAE1dcIUAAAAAHJptewAAAAAMpNVdAAAAAJD6r4sAAAAAQ8zgSwAAAAAdMeh2AAAAAOH1IFMAAAAA5s/tgQAAAAA5Hmn4AAAAAD0klXcAAAAACWVZYQAAAADv79iWAAAAAPNqwSgAAAAAykW6vAAAAABd1BdYAAAAAISlGyAAAAAABPGqvwAAAAAzMKygAAAAAP7mIPYAAAAANoxv1gAAAAAQO/EvAAAAALzevV8AAAAAMWauuQAAAAB9lS1nAAAAABLXSnAAAAAACY8rLAAAAABu6imZAAAAAApPPxkAAAAAsZI4fwAAAAC0VCfNAAAAAPMGkaUAAAAAo7i9twAAAABgyShMAAAAAO4ZTm0AAAAAXnq4KQAAAADL712mAAAAAKV48z4AAAAAbD77PgAAAABDgyi1AAAAAF1zypgAAAAADED1NQAAAACdQrKsAAAAAMDyv0sAAAAANJNmVAAAAACQJTCWAAAAAKFhpd8AAAAA5dn+rAAAAADZJnjLAAAAAPsJ4DEAAAAA24AiwwAAAAAavNyuAAAAAJ/90BEAAAAAYcYuYwAAAADqjbAXAAAAAFGRlQYAAAAA3BiyxgAAAAAbacbRAAAAAIct0hIAAAAAdxmaPAAAAAAWsk0aAAAAALkGV08AAAAAPlprVAAAAADgvPghAAAAAMDUJL0AAAAAGkuLWwAAAACBczInAAAAAOvMn9AAAAAArLBMOgAAAABSTKSJAAAAAMhvCI4AAAAAe9BgpQAAAAAmEXUwAAAAAIJdol0AAAAAWbmklAAAAACZLiX1AAAAADCnUEsAAAAAdn4b5QAAAABdMEDSAAAAAHiL1TsAAAAAqTnYKQAAAADNLeXxAAAAACoiIRwAAAAAKjX6cQAAAAC50do0AAAAAPf3e8cAAAAAsgk1bgAAAABkW3TRAAAAADRaJYIAAAAAWZdhCQAAAADXiFFwAAAAAPr229wAAAAAMaIXQQAAAABRdq3UAAAAAA754sIAAAAAM0dpbQAAAADzVNFWAAAAABP2M6gAAAAANQUAZwAAAAB/6mUzAAAAABz6VtIAAAAAR82aQgAAAACFyWWoAAAAAEXSQJEAAAAAq1MIyQAAAADmdtDCAAAAAIyfAlMAAAAA1S2biAAAAAD5ht4UAAAAAJ4ITdcAAAAACGZDpAAAAADcT1FtAAAAAFjAPw4AAAAA50Y9RwAAAABxnhZhAAAAAKFjeD0AAAAApIfw+wAAAAB9EQ/HAAAAAGE6DkQAAAAAEX9+IgAAAABGWglfAAAAAGn+J6sAAAAAHlC6gQAAAACJkBE6AAAAAMfj7cMAAAAA+SBbVgAAAABqj5WSAAAAANrwPY4AAAAAg+X9iQAAAACqRLSaAAAAAMmtyHkAAAAAVSX1DQAAAACUE/7bAAAAACvdcOMAAAAAR5cTIQAAAABG6d1bAAAAAH0P85IAAAAAM9OBXgAAAADgEVo3AAAAAL4EtmkAAAAAFu0k3QAAAADUPQN2AAAAAJxxxi0AAAAATgOS1gAAAAAF3krOAAAAAJroewYAAAAAgZ3e0QAAAADyjQwWAAAAAMUurd8AAAAAW2NEZgAAAAAWS3pdAAAAABeXFoIAAAAAfGiTfgAAAABk3nWsAAAAAKhIgLkAAAAAWPFQMgAAAADom33CAAAAACPH41gAAAAAyQp9TgAAAACIP2hdAAAAALAhF/YAAAAATDvpbAAAAAA2FR7bAAAAAPYJcpIAAAAAin3jXgAAAABYltyNAAAAAIpg+ncAAAAA7Va1tQAAAABh4LXsAAAAACrXx+oAAAAA5cCekAAAAACTshg1AAAAAEYaxXAAAAAAA7zFiQAAAADroW2SAAAAAPNdl9wAAAAA5dsX4QAAAAD513YBAAAAAK/w938AAAAATsHv3wAAAADA5xK6AAAAAFxaulgAAAAA8uuuLwAAAAAhyC1jAAAAAB4/5iUAAAAAd860oAAAAAD6dizeAAAAAGbtswYAAAAAgQoojwAAAAAGbUCOAAAAAMs648AAAAAAkfdm2AAAAACNfYnRAAAAAN9oH8cAAAAAvSC+/wAAAADF3yn1AAAAAA1vZPAAAAAAqcLtmAAAAAC7rpqZAAAAAMXEZ4sAAAAARNjWgAAAAABdWyY5AAAAAMEt0jQAAAAAC19z8gAAAAAbqZQPAAAAAK+o9qsAAAAAswuOBgAAAABNAW9GAAAAADt8dF8AAAAAZmMbqgAAAAAPNnCuAAAAAA6DqC0AAAAAhGAFYgAAAAD4nHoyAAAAAIkR+C0AAAAAMLAerQAAAADByP18AAAAAAkH8qEAAAAAHWPWZwAAAAAxMPGHAAAAAELsVw4AAAAAWmd5KwAAAAArsq8/AAAAAJFH0WkAAAAAYTyXywAAAAAjpkmcAAAAAHHaJlQAAAAAr4NHbQAAAAD18ezaAAAAAC5PbzwAAAAAA3gyPQAAAABdThpbAAAAADo+EOgAAAAAIzzALAAAAACmT8oEAAAAAIdvmScAAAAAWi3kQgAAAAA3Y8JQAAAAAKvFxrsAAAAAtjx3AgAAAACxZyDGAAAAAMsolLAAAAAAIGNeQAAAAAA3/8AuAAAAAILty5YAAAAANZSksgAAAAAA1vvJAAAAAJSPF5kAAAAAw57qCQAAAABgpnIHAAAAAPFd/EoAAAAAInH0DgAAAABMy9xzAAAAAOZtFlMAAAAAmldKOQAAAAB5/aofAAAAADAIsywAAAAAnsUS8AAAAAAUuQZGAAAAAAu8oDkAAAAAqvxZgQAAAACSiDrPAAAAAFx5PUQAAAAAqp8DmgAAAACGA3nKAAAAAGV/oVYAAAAAcljhjAAAAAB8PQ+jAAAAAEV+qF4AAAAANMzXTgAAAAAQZqxfAAAAAN60E00AAAAAOG0aAQAAAACCjtNyAAAAAH1BwuAAAAAABNjhCAAAAACFjlG3AAAAAMasoRoAAAAAkKLWjAAAAAAi2G9hAAAAAOPWDZ8AAAAAtqlkLgAAAABo3ad/AAAAAD64FmsAAAAAvC/PSgAAAAA9Hb+xAAAAAK9MAskAAAAAl+XMQgAAAABx39E0AAAAACbIXnwAAAAAJVZ04QAAAABY6X6gAAAAAK3YOHAAAAAAMzZT8QAAAACTuyyGAAAAAL4ATmEAAAAAxd584AAAAACsP8aBAAAAAGtx69oAAAAA9GzMGQAAAADWDVLzAAAAALh9Dl4AAAAAM0x7cQAAAAAD8hzQAAAAAKRxgnsAAAAAYNAHkgAAAAASg9DNAAAAAFVlqe0AAAAAxNytYgAAAADgp8pcAAAAAPJV590AAAAAI08FAAAAAADuVLlZAAAAAAZVpJUAAAAAUeHEowAAAACTWRPMAAAAAIzCkycAAAAANuimLAAAAABrR25WAAAAAAypjtkAAAAA99Xa0QAAAADT7D44AAAAABqQvrcAAAAAlpMhbgAAAAAJeSMxAAAAAHekH1oAAAAAlRRHCgAAAAAjWM/eAAAAAJchAdsAAAAA505bTAAAAABFJtFuAAAAAExN/+EAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==",
|
| 135 |
+
"cuda_rng_state": "kRard2YmFAAAAAAAAAAAAA=="
|
| 136 |
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"files": {
|
| 4 |
+
"config.json": "47558a8af42aad173abcc41b3986c57111d276c5c47ac80abaa0bf9e2145ad16",
|
| 5 |
+
"model.safetensors": "2e684610b123f7a20177667c6298268bad2368c40ce789b6c09caaef5842e4c8",
|
| 6 |
+
"optimizer.safetensors": "16ee8c854db156690329f8ccdc10bb5c6b990d686dcd51933f346bf879480a5c",
|
| 7 |
+
"training_state.json": "199f945a5486745483900620dd8e639fd8f1e77cadbf2be10eb63cbb1014b216"
|
| 8 |
+
}
|
| 9 |
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"checkpoint_type": "pretrain",
|
| 4 |
+
"model_config": {
|
| 5 |
+
"vocab_size": 1980,
|
| 6 |
+
"max_seq_len": 512,
|
| 7 |
+
"n_outcomes": 11,
|
| 8 |
+
"d_model": 640,
|
| 9 |
+
"n_layers": 10,
|
| 10 |
+
"n_heads": 8,
|
| 11 |
+
"d_ff": 2560,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"rope_base": 10000.0
|
| 14 |
+
},
|
| 15 |
+
"training_config": {
|
| 16 |
+
"lr": 0.0003,
|
| 17 |
+
"weight_decay": 0.01,
|
| 18 |
+
"max_grad_norm": 1.0,
|
| 19 |
+
"warmup_steps": 10000,
|
| 20 |
+
"total_steps": 200000,
|
| 21 |
+
"batch_size": 256,
|
| 22 |
+
"max_ply": 512,
|
| 23 |
+
"discard_ply_limit": false,
|
| 24 |
+
"num_workers": 4,
|
| 25 |
+
"use_amp": true,
|
| 26 |
+
"accumulation_steps": 1,
|
| 27 |
+
"log_interval": 50,
|
| 28 |
+
"eval_interval": 1000,
|
| 29 |
+
"checkpoint_interval": 5000,
|
| 30 |
+
"pause_after_steps": null,
|
| 31 |
+
"no_outcome_token": false,
|
| 32 |
+
"prepend_outcome": false,
|
| 33 |
+
"mate_boost": 0.0,
|
| 34 |
+
"base_seed": 42,
|
| 35 |
+
"val_seed": 9223372036854775807,
|
| 36 |
+
"val_games": 2048,
|
| 37 |
+
"checkpoint_dir": "checkpoints",
|
| 38 |
+
"log_dir": "/workspace/logs",
|
| 39 |
+
"use_wandb": false,
|
| 40 |
+
"wandb_project": "pawn",
|
| 41 |
+
"device": "cuda"
|
| 42 |
+
}
|
| 43 |
+
}
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e684610b123f7a20177667c6298268bad2368c40ce789b6c09caaef5842e4c8
|
| 3 |
+
size 267647080
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:16ee8c854db156690329f8ccdc10bb5c6b990d686dcd51933f346bf879480a5c
|
| 3 |
+
size 535301204
|
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"global_step": 30000,
|
| 4 |
+
"scheduler_state_dict": {
|
| 5 |
+
"step": 30000
|
| 6 |
+
},
|
| 7 |
+
"scaler_state_dict": {
|
| 8 |
+
"scale": 262144.0,
|
| 9 |
+
"growth_factor": 2.0,
|
| 10 |
+
"backoff_factor": 0.5,
|
| 11 |
+
"growth_interval": 2000,
|
| 12 |
+
"_growth_tracker": 549
|
| 13 |
+
},
|
| 14 |
+
"optimizer_meta": {
|
| 15 |
+
"param_groups": [
|
| 16 |
+
{
|
| 17 |
+
"lr": 0.0002926853276295856,
|
| 18 |
+
"betas": [
|
| 19 |
+
0.9,
|
| 20 |
+
0.999
|
| 21 |
+
],
|
| 22 |
+
"eps": 1e-08,
|
| 23 |
+
"weight_decay": 0.01,
|
| 24 |
+
"amsgrad": false,
|
| 25 |
+
"maximize": false,
|
| 26 |
+
"foreach": null,
|
| 27 |
+
"capturable": false,
|
| 28 |
+
"differentiable": false,
|
| 29 |
+
"fused": null,
|
| 30 |
+
"decoupled_weight_decay": true,
|
| 31 |
+
"params": [
|
| 32 |
+
0,
|
| 33 |
+
1,
|
| 34 |
+
2,
|
| 35 |
+
3,
|
| 36 |
+
4,
|
| 37 |
+
5,
|
| 38 |
+
6,
|
| 39 |
+
7,
|
| 40 |
+
8,
|
| 41 |
+
9,
|
| 42 |
+
10,
|
| 43 |
+
11,
|
| 44 |
+
12,
|
| 45 |
+
13,
|
| 46 |
+
14,
|
| 47 |
+
15,
|
| 48 |
+
16,
|
| 49 |
+
17,
|
| 50 |
+
18,
|
| 51 |
+
19,
|
| 52 |
+
20,
|
| 53 |
+
21,
|
| 54 |
+
22,
|
| 55 |
+
23,
|
| 56 |
+
24,
|
| 57 |
+
25,
|
| 58 |
+
26,
|
| 59 |
+
27,
|
| 60 |
+
28,
|
| 61 |
+
29,
|
| 62 |
+
30,
|
| 63 |
+
31,
|
| 64 |
+
32,
|
| 65 |
+
33,
|
| 66 |
+
34,
|
| 67 |
+
35,
|
| 68 |
+
36,
|
| 69 |
+
37,
|
| 70 |
+
38,
|
| 71 |
+
39,
|
| 72 |
+
40,
|
| 73 |
+
41,
|
| 74 |
+
42,
|
| 75 |
+
43,
|
| 76 |
+
44,
|
| 77 |
+
45,
|
| 78 |
+
46,
|
| 79 |
+
47,
|
| 80 |
+
48,
|
| 81 |
+
49,
|
| 82 |
+
50,
|
| 83 |
+
51,
|
| 84 |
+
52,
|
| 85 |
+
53,
|
| 86 |
+
54,
|
| 87 |
+
55,
|
| 88 |
+
56,
|
| 89 |
+
57,
|
| 90 |
+
58,
|
| 91 |
+
59,
|
| 92 |
+
60,
|
| 93 |
+
61,
|
| 94 |
+
62,
|
| 95 |
+
63,
|
| 96 |
+
64,
|
| 97 |
+
65,
|
| 98 |
+
66,
|
| 99 |
+
67,
|
| 100 |
+
68,
|
| 101 |
+
69,
|
| 102 |
+
70,
|
| 103 |
+
71,
|
| 104 |
+
72,
|
| 105 |
+
73,
|
| 106 |
+
74,
|
| 107 |
+
75,
|
| 108 |
+
76,
|
| 109 |
+
77,
|
| 110 |
+
78,
|
| 111 |
+
79,
|
| 112 |
+
80,
|
| 113 |
+
81,
|
| 114 |
+
82,
|
| 115 |
+
83,
|
| 116 |
+
84,
|
| 117 |
+
85,
|
| 118 |
+
86,
|
| 119 |
+
87,
|
| 120 |
+
88,
|
| 121 |
+
89,
|
| 122 |
+
90,
|
| 123 |
+
91,
|
| 124 |
+
92,
|
| 125 |
+
93,
|
| 126 |
+
94,
|
| 127 |
+
95,
|
| 128 |
+
96
|
| 129 |
+
]
|
| 130 |
+
}
|
| 131 |
+
],
|
| 132 |
+
"scalars": null
|
| 133 |
+
},
|
| 134 |
+
"torch_rng_state": "hljcGxUfSQ+fAQAAAQAAANIAAAAAAAAAzj3uUQAAAAAlDbfbAAAAAODXdhYAAAAAHAdTtgAAAABPp7tMAAAAAASjDTUAAAAA5xmHeQAAAADgrPl2AAAAAGDTO+4AAAAAEV5vpQAAAAAoLmVHAAAAAOM3XOwAAAAArcqH+QAAAADiw0BbAAAAAGj5lM8AAAAAcw+GYgAAAACrLjMSAAAAADGQlT4AAAAAaZpDJQAAAACBzZ+gAAAAAPc+rTkAAAAAFSQBQwAAAADp6uHMAAAAAHNXWQYAAAAA2lY4AwAAAAAXrHbYAAAAAKP1wi0AAAAA4zBXUwAAAAASoagbAAAAAAMltJoAAAAAyJh3RwAAAACMKoqOAAAAAEFk+8UAAAAABdjhIAAAAACeoY6ZAAAAAN6XSAgAAAAA4I5MvgAAAADLcLbFAAAAAJBiDvMAAAAABtjFygAAAAD/E32LAAAAAEgRBGMAAAAA7SItpwAAAAA8l1BPAAAAAIp05iwAAAAAcqnPuQAAAABhWFc4AAAAAN3gcLsAAAAAqRf8NAAAAADfX3chAAAAADop2cYAAAAAXkRDAAAAAAAE9144AAAAAIeDKI4AAAAAy6DHcAAAAABuF9jQAAAAABddICQAAAAAdqW3OgAAAACWziEwAAAAAGmr+FAAAAAAj1XhIAAAAAC/foK4AAAAAAH5SLYAAAAAI6MKfAAAAACLG+U3AAAAADn/ly0AAAAANgH0tAAAAABN+QB2AAAAAAly8WIAAAAA5AHsOQAAAAAhSzuMAAAAAAflo4YAAAAAcREqzgAAAADCEfxMAAAAAH0l6y4AAAAAcGr/nQAAAAAqDd21AAAAAJYzfLEAAAAAI3IK4wAAAAB1YRabAAAAAOhNxmwAAAAA6N/jNwAAAACIiTZQAAAAAJXTfhgAAAAAK1dacAAAAAA3A/CSAAAAAMAY52MAAAAASmYhwAAAAAAhywp5AAAAAJLXrvIAAAAABIg7zAAAAAC/Pf4IAAAAAAt1nVQAAAAAo9VJOAAAAADwcwdYAAAAAEbzUaYAAAAA6XoZHQAAAACTcjjdAAAAANXrCksAAAAA0oqT3gAAAACzxja2AAAAAJ+cHWQAAAAAMTfUOAAAAAAF5ZPAAAAAADzzboQAAAAAhOYUwwAAAAAuufxEAAAAANB6LDwAAAAAw2ouuwAAAABUF4EMAAAAAGKc/FMAAAAAxoYYWwAAAACideOdAAAAAOxJAuQAAAAAiaBQZwAAAAAbUJMKAAAAADOqykYAAAAAFw4BNgAAAABlBOehAAAAAArQmRIAAAAAnrPfGQAAAACN1jNwAAAAAGyEccoAAAAAp7rovAAAAAAGI0Z3AAAAALb8PJwAAAAALIImuQAAAACQJRU8AAAAAG5cKlkAAAAAIG6fBgAAAAACC273AAAAAP7o+5gAAAAAToXBRwAAAADFqYJ8AAAAAFT/jrsAAAAAYrhP0wAAAABN0ONuAAAAACzQYcAAAAAALM4z2gAAAADLKdG9AAAAAFbJbG4AAAAAi2dK3QAAAADKrnC1AAAAAGsTRpAAAAAANSwMvwAAAABStYiWAAAAADUfVekAAAAAbPDIYgAAAACtadvrAAAAAJbEir8AAAAAcRp4fAAAAADiXAigAAAAAOhvPmUAAAAA56FHTwAAAAD404Q8AAAAAG/Gb1AAAAAABzBngAAAAAB53GsnAAAAACNJByYAAAAAJWk6mAAAAAAR0/H8AAAAABk8q/IAAAAATyzT3QAAAABjLsHtAAAAAHg0v5IAAAAA6OFrMgAAAADajHYNAAAAAG4RtjQAAAAAZ+TORAAAAAAnp1qMAAAAACHwtoMAAAAA+ds8hAAAAABKzPteAAAAAN4eczUAAAAAkvDP0wAAAACqFw2IAAAAAHoBh4cAAAAAePOLRAAAAABT8DC/AAAAADVTwpEAAAAAUYiatQAAAACXZrLGAAAAAN+HV94AAAAAwxkwXQAAAAABQ1UXAAAAAM8BXBAAAAAARVbEhgAAAABnz0LDAAAAAHTQvKQAAAAA4HY1MQAAAABUAbX5AAAAAJgWAP8AAAAABeXZKAAAAABenJweAAAAAEG+UhoAAAAA5NC0pAAAAACDD2m9AAAAAH08/fgAAAAAQAV27AAAAABy04BGAAAAAO1Isa8AAAAAQwhJCwAAAAAlhx20AAAAAFtpMRUAAAAAOho9HQAAAAAiLbXnAAAAACkQ0dEAAAAAZ4mA4gAAAACylZP9AAAAAObE36gAAAAA0oWi3QAAAABLb+MIAAAAAOxj0fIAAAAAltiwmAAAAAAH3U8bAAAAAGcUCo8AAAAApMjanwAAAADJ4qqNAAAAAGpkiMgAAAAAXCXW+AAAAAD6wxo9AAAAALAyHzEAAAAAl44w/QAAAADHBcyvAAAAAGjeZS4AAAAAlvI2NgAAAABQsOQVAAAAAMn5VB8AAAAA270MUQAAAACwmptEAAAAAI5Il2QAAAAASMbr4gAAAAAnA1HMAAAAAPZJyZwAAAAAM/a23QAAAACZ2LlLAAAAAJm6ic4AAAAALfCgigAAAACcnwxSAAAAAK/X8lsAAAAA3eS8AwAAAABGDHhzAAAAAPXy1CQAAAAAD3+vSwAAAADwF1l7AAAAACjQk50AAAAAZA3zGAAAAACUnvzAAAAAAAdBjNMAAAAATfPJPwAAAABLkJCfAAAAAKtnKQsAAAAA3FdwiAAAAACMII9UAAAAAJOB1P0AAAAAhOeLtQAAAAALcTVxAAAAAA0auq0AAAAAu8nHLwAAAABWNSAVAAAAAPduvpIAAAAAKCzZGQAAAACKQpFkAAAAAP+T+IoAAAAAJqrv3QAAAADyaf/GAAAAADSEtpwAAAAA/nbj+QAAAAA/p4CMAAAAAHMd2ZkAAAAA5OCN/gAAAADLZhkKAAAAAHH9PcQAAAAA7As7sQAAAADp584RAAAAAOFghuoAAAAAYSpZeAAAAABqOyxAAAAAAJb5AZkAAAAAHNhbewAAAACruWdUAAAAAIEE2RAAAAAASCZM9AAAAABH2+4+AAAAAHqg1x4AAAAAmwaCDgAAAABZiDpDAAAAALpp8WkAAAAArSNt2AAAAACAI3NMAAAAAK5sIVkAAAAAvrPq9AAAAACn/ZbJAAAAAE1dcIUAAAAAHJptewAAAAAMpNVdAAAAAJD6r4sAAAAAQ8zgSwAAAAAdMeh2AAAAAOH1IFMAAAAA5s/tgQAAAAA5Hmn4AAAAAD0klXcAAAAACWVZYQAAAADv79iWAAAAAPNqwSgAAAAAykW6vAAAAABd1BdYAAAAAISlGyAAAAAABPGqvwAAAAAzMKygAAAAAP7mIPYAAAAANoxv1gAAAAAQO/EvAAAAALzevV8AAAAAMWauuQAAAAB9lS1nAAAAABLXSnAAAAAACY8rLAAAAABu6imZAAAAAApPPxkAAAAAsZI4fwAAAAC0VCfNAAAAAPMGkaUAAAAAo7i9twAAAABgyShMAAAAAO4ZTm0AAAAAXnq4KQAAAADL712mAAAAAKV48z4AAAAAbD77PgAAAABDgyi1AAAAAF1zypgAAAAADED1NQAAAACdQrKsAAAAAMDyv0sAAAAANJNmVAAAAACQJTCWAAAAAKFhpd8AAAAA5dn+rAAAAADZJnjLAAAAAPsJ4DEAAAAA24AiwwAAAAAavNyuAAAAAJ/90BEAAAAAYcYuYwAAAADqjbAXAAAAAFGRlQYAAAAA3BiyxgAAAAAbacbRAAAAAIct0hIAAAAAdxmaPAAAAAAWsk0aAAAAALkGV08AAAAAPlprVAAAAADgvPghAAAAAMDUJL0AAAAAGkuLWwAAAACBczInAAAAAOvMn9AAAAAArLBMOgAAAABSTKSJAAAAAMhvCI4AAAAAe9BgpQAAAAAmEXUwAAAAAIJdol0AAAAAWbmklAAAAACZLiX1AAAAADCnUEsAAAAAdn4b5QAAAABdMEDSAAAAAHiL1TsAAAAAqTnYKQAAAADNLeXxAAAAACoiIRwAAAAAKjX6cQAAAAC50do0AAAAAPf3e8cAAAAAsgk1bgAAAABkW3TRAAAAADRaJYIAAAAAWZdhCQAAAADXiFFwAAAAAPr229wAAAAAMaIXQQAAAABRdq3UAAAAAA754sIAAAAAM0dpbQAAAADzVNFWAAAAABP2M6gAAAAANQUAZwAAAAB/6mUzAAAAABz6VtIAAAAAR82aQgAAAACFyWWoAAAAAEXSQJEAAAAAq1MIyQAAAADmdtDCAAAAAIyfAlMAAAAA1S2biAAAAAD5ht4UAAAAAJ4ITdcAAAAACGZDpAAAAADcT1FtAAAAAFjAPw4AAAAA50Y9RwAAAABxnhZhAAAAAKFjeD0AAAAApIfw+wAAAAB9EQ/HAAAAAGE6DkQAAAAAEX9+IgAAAABGWglfAAAAAGn+J6sAAAAAHlC6gQAAAACJkBE6AAAAAMfj7cMAAAAA+SBbVgAAAABqj5WSAAAAANrwPY4AAAAAg+X9iQAAAACqRLSaAAAAAMmtyHkAAAAAVSX1DQAAAACUE/7bAAAAACvdcOMAAAAAR5cTIQAAAABG6d1bAAAAAH0P85IAAAAAM9OBXgAAAADgEVo3AAAAAL4EtmkAAAAAFu0k3QAAAADUPQN2AAAAAJxxxi0AAAAATgOS1gAAAAAF3krOAAAAAJroewYAAAAAgZ3e0QAAAADyjQwWAAAAAMUurd8AAAAAW2NEZgAAAAAWS3pdAAAAABeXFoIAAAAAfGiTfgAAAABk3nWsAAAAAKhIgLkAAAAAWPFQMgAAAADom33CAAAAACPH41gAAAAAyQp9TgAAAACIP2hdAAAAALAhF/YAAAAATDvpbAAAAAA2FR7bAAAAAPYJcpIAAAAAin3jXgAAAABYltyNAAAAAIpg+ncAAAAA7Va1tQAAAABh4LXsAAAAACrXx+oAAAAA5cCekAAAAACTshg1AAAAAEYaxXAAAAAAA7zFiQAAAADroW2SAAAAAPNdl9wAAAAA5dsX4QAAAAD513YBAAAAAK/w938AAAAATsHv3wAAAADA5xK6AAAAAFxaulgAAAAA8uuuLwAAAAAhyC1jAAAAAB4/5iUAAAAAd860oAAAAAD6dizeAAAAAGbtswYAAAAAgQoojwAAAAAGbUCOAAAAAMs648AAAAAAkfdm2AAAAACNfYnRAAAAAN9oH8cAAAAAvSC+/wAAAADF3yn1AAAAAA1vZPAAAAAAqcLtmAAAAAC7rpqZAAAAAMXEZ4sAAAAARNjWgAAAAABdWyY5AAAAAMEt0jQAAAAAC19z8gAAAAAbqZQPAAAAAK+o9qsAAAAAswuOBgAAAABNAW9GAAAAADt8dF8AAAAAZmMbqgAAAAAPNnCuAAAAAA6DqC0AAAAAhGAFYgAAAAD4nHoyAAAAAIkR+C0AAAAAMLAerQAAAADByP18AAAAAAkH8qEAAAAAHWPWZwAAAAAxMPGHAAAAAELsVw4AAAAAWmd5KwAAAAArsq8/AAAAAJFH0WkAAAAAYTyXywAAAAAjpkmcAAAAAHHaJlQAAAAAr4NHbQAAAAD18ezaAAAAAC5PbzwAAAAAA3gyPQAAAABdThpbAAAAADo+EOgAAAAAIzzALAAAAACmT8oEAAAAAIdvmScAAAAAWi3kQgAAAAA3Y8JQAAAAAKvFxrsAAAAAtjx3AgAAAACxZyDGAAAAAMsolLAAAAAAIGNeQAAAAAA3/8AuAAAAAILty5YAAAAANZSksgAAAAAA1vvJAAAAAJSPF5kAAAAAw57qCQAAAABgpnIHAAAAAPFd/EoAAAAAInH0DgAAAABMy9xzAAAAAOZtFlMAAAAAmldKOQAAAAB5/aofAAAAADAIsywAAAAAnsUS8AAAAAAUuQZGAAAAAAu8oDkAAAAAqvxZgQAAAACSiDrPAAAAAFx5PUQAAAAAqp8DmgAAAACGA3nKAAAAAGV/oVYAAAAAcljhjAAAAAB8PQ+jAAAAAEV+qF4AAAAANMzXTgAAAAAQZqxfAAAAAN60E00AAAAAOG0aAQAAAACCjtNyAAAAAH1BwuAAAAAABNjhCAAAAACFjlG3AAAAAMasoRoAAAAAkKLWjAAAAAAi2G9hAAAAAOPWDZ8AAAAAtqlkLgAAAABo3ad/AAAAAD64FmsAAAAAvC/PSgAAAAA9Hb+xAAAAAK9MAskAAAAAl+XMQgAAAABx39E0AAAAACbIXnwAAAAAJVZ04QAAAABY6X6gAAAAAK3YOHAAAAAAMzZT8QAAAACTuyyGAAAAAL4ATmEAAAAAxd584AAAAACsP8aBAAAAAGtx69oAAAAA9GzMGQAAAADWDVLzAAAAALh9Dl4AAAAAM0x7cQAAAAAD8hzQAAAAAKRxgnsAAAAAYNAHkgAAAAASg9DNAAAAAFVlqe0AAAAAxNytYgAAAADgp8pcAAAAAPJV590AAAAAI08FAAAAAADuVLlZAAAAAAZVpJUAAAAAUeHEowAAAACTWRPMAAAAAIzCkycAAAAANuimLAAAAABrR25WAAAAAAypjtkAAAAA99Xa0QAAAADT7D44AAAAABqQvrcAAAAAlpMhbgAAAAAJeSMxAAAAAHekH1oAAAAAlRRHCgAAAAAjWM/eAAAAAJchAdsAAAAA505bTAAAAABFJtFuAAAAAExN/+EAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==",
|
| 135 |
+
"cuda_rng_state": "kRard2YmFAAAAAAAAAAAAA=="
|
| 136 |
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"files": {
|
| 4 |
+
"config.json": "47558a8af42aad173abcc41b3986c57111d276c5c47ac80abaa0bf9e2145ad16",
|
| 5 |
+
"model.safetensors": "2ff3a513e80be0373d161987dc1f993fd68a37bbdc76338f99e213201d9c643f",
|
| 6 |
+
"optimizer.safetensors": "41f18b27c56dd4334b4f1c98690d96cf5f7da3b9d60646c17d3445dfcaee6d2a",
|
| 7 |
+
"training_state.json": "aac6e3ee58c523d07b07dbdb07b18dbf766165c7c0078248c0e3eeff098762f2"
|
| 8 |
+
}
|
| 9 |
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"checkpoint_type": "pretrain",
|
| 4 |
+
"model_config": {
|
| 5 |
+
"vocab_size": 1980,
|
| 6 |
+
"max_seq_len": 512,
|
| 7 |
+
"n_outcomes": 11,
|
| 8 |
+
"d_model": 640,
|
| 9 |
+
"n_layers": 10,
|
| 10 |
+
"n_heads": 8,
|
| 11 |
+
"d_ff": 2560,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"rope_base": 10000.0
|
| 14 |
+
},
|
| 15 |
+
"training_config": {
|
| 16 |
+
"lr": 0.0003,
|
| 17 |
+
"weight_decay": 0.01,
|
| 18 |
+
"max_grad_norm": 1.0,
|
| 19 |
+
"warmup_steps": 10000,
|
| 20 |
+
"total_steps": 200000,
|
| 21 |
+
"batch_size": 256,
|
| 22 |
+
"max_ply": 512,
|
| 23 |
+
"discard_ply_limit": false,
|
| 24 |
+
"num_workers": 4,
|
| 25 |
+
"use_amp": true,
|
| 26 |
+
"accumulation_steps": 1,
|
| 27 |
+
"log_interval": 50,
|
| 28 |
+
"eval_interval": 1000,
|
| 29 |
+
"checkpoint_interval": 5000,
|
| 30 |
+
"pause_after_steps": null,
|
| 31 |
+
"no_outcome_token": false,
|
| 32 |
+
"prepend_outcome": false,
|
| 33 |
+
"mate_boost": 0.0,
|
| 34 |
+
"base_seed": 42,
|
| 35 |
+
"val_seed": 9223372036854775807,
|
| 36 |
+
"val_games": 2048,
|
| 37 |
+
"checkpoint_dir": "checkpoints",
|
| 38 |
+
"log_dir": "/workspace/logs",
|
| 39 |
+
"use_wandb": false,
|
| 40 |
+
"wandb_project": "pawn",
|
| 41 |
+
"device": "cuda"
|
| 42 |
+
}
|
| 43 |
+
}
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ff3a513e80be0373d161987dc1f993fd68a37bbdc76338f99e213201d9c643f
|
| 3 |
+
size 267647080
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:41f18b27c56dd4334b4f1c98690d96cf5f7da3b9d60646c17d3445dfcaee6d2a
|
| 3 |
+
size 535301204
|
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"global_step": 35000,
|
| 4 |
+
"scheduler_state_dict": {
|
| 5 |
+
"step": 35000
|
| 6 |
+
},
|
| 7 |
+
"scaler_state_dict": {
|
| 8 |
+
"scale": 262144.0,
|
| 9 |
+
"growth_factor": 2.0,
|
| 10 |
+
"backoff_factor": 0.5,
|
| 11 |
+
"growth_interval": 2000,
|
| 12 |
+
"_growth_tracker": 1607
|
| 13 |
+
},
|
| 14 |
+
"optimizer_meta": {
|
| 15 |
+
"param_groups": [
|
| 16 |
+
{
|
| 17 |
+
"lr": 0.00028862939909843273,
|
| 18 |
+
"betas": [
|
| 19 |
+
0.9,
|
| 20 |
+
0.999
|
| 21 |
+
],
|
| 22 |
+
"eps": 1e-08,
|
| 23 |
+
"weight_decay": 0.01,
|
| 24 |
+
"amsgrad": false,
|
| 25 |
+
"maximize": false,
|
| 26 |
+
"foreach": null,
|
| 27 |
+
"capturable": false,
|
| 28 |
+
"differentiable": false,
|
| 29 |
+
"fused": null,
|
| 30 |
+
"decoupled_weight_decay": true,
|
| 31 |
+
"params": [
|
| 32 |
+
0,
|
| 33 |
+
1,
|
| 34 |
+
2,
|
| 35 |
+
3,
|
| 36 |
+
4,
|
| 37 |
+
5,
|
| 38 |
+
6,
|
| 39 |
+
7,
|
| 40 |
+
8,
|
| 41 |
+
9,
|
| 42 |
+
10,
|
| 43 |
+
11,
|
| 44 |
+
12,
|
| 45 |
+
13,
|
| 46 |
+
14,
|
| 47 |
+
15,
|
| 48 |
+
16,
|
| 49 |
+
17,
|
| 50 |
+
18,
|
| 51 |
+
19,
|
| 52 |
+
20,
|
| 53 |
+
21,
|
| 54 |
+
22,
|
| 55 |
+
23,
|
| 56 |
+
24,
|
| 57 |
+
25,
|
| 58 |
+
26,
|
| 59 |
+
27,
|
| 60 |
+
28,
|
| 61 |
+
29,
|
| 62 |
+
30,
|
| 63 |
+
31,
|
| 64 |
+
32,
|
| 65 |
+
33,
|
| 66 |
+
34,
|
| 67 |
+
35,
|
| 68 |
+
36,
|
| 69 |
+
37,
|
| 70 |
+
38,
|
| 71 |
+
39,
|
| 72 |
+
40,
|
| 73 |
+
41,
|
| 74 |
+
42,
|
| 75 |
+
43,
|
| 76 |
+
44,
|
| 77 |
+
45,
|
| 78 |
+
46,
|
| 79 |
+
47,
|
| 80 |
+
48,
|
| 81 |
+
49,
|
| 82 |
+
50,
|
| 83 |
+
51,
|
| 84 |
+
52,
|
| 85 |
+
53,
|
| 86 |
+
54,
|
| 87 |
+
55,
|
| 88 |
+
56,
|
| 89 |
+
57,
|
| 90 |
+
58,
|
| 91 |
+
59,
|
| 92 |
+
60,
|
| 93 |
+
61,
|
| 94 |
+
62,
|
| 95 |
+
63,
|
| 96 |
+
64,
|
| 97 |
+
65,
|
| 98 |
+
66,
|
| 99 |
+
67,
|
| 100 |
+
68,
|
| 101 |
+
69,
|
| 102 |
+
70,
|
| 103 |
+
71,
|
| 104 |
+
72,
|
| 105 |
+
73,
|
| 106 |
+
74,
|
| 107 |
+
75,
|
| 108 |
+
76,
|
| 109 |
+
77,
|
| 110 |
+
78,
|
| 111 |
+
79,
|
| 112 |
+
80,
|
| 113 |
+
81,
|
| 114 |
+
82,
|
| 115 |
+
83,
|
| 116 |
+
84,
|
| 117 |
+
85,
|
| 118 |
+
86,
|
| 119 |
+
87,
|
| 120 |
+
88,
|
| 121 |
+
89,
|
| 122 |
+
90,
|
| 123 |
+
91,
|
| 124 |
+
92,
|
| 125 |
+
93,
|
| 126 |
+
94,
|
| 127 |
+
95,
|
| 128 |
+
96
|
| 129 |
+
]
|
| 130 |
+
}
|
| 131 |
+
],
|
| 132 |
+
"scalars": null
|
| 133 |
+
},
|
| 134 |
+
"torch_rng_state": "hljcGxUfSQ+fAQAAAQAAANIAAAAAAAAAzj3uUQAAAAAlDbfbAAAAAODXdhYAAAAAHAdTtgAAAABPp7tMAAAAAASjDTUAAAAA5xmHeQAAAADgrPl2AAAAAGDTO+4AAAAAEV5vpQAAAAAoLmVHAAAAAOM3XOwAAAAArcqH+QAAAADiw0BbAAAAAGj5lM8AAAAAcw+GYgAAAACrLjMSAAAAADGQlT4AAAAAaZpDJQAAAACBzZ+gAAAAAPc+rTkAAAAAFSQBQwAAAADp6uHMAAAAAHNXWQYAAAAA2lY4AwAAAAAXrHbYAAAAAKP1wi0AAAAA4zBXUwAAAAASoagbAAAAAAMltJoAAAAAyJh3RwAAAACMKoqOAAAAAEFk+8UAAAAABdjhIAAAAACeoY6ZAAAAAN6XSAgAAAAA4I5MvgAAAADLcLbFAAAAAJBiDvMAAAAABtjFygAAAAD/E32LAAAAAEgRBGMAAAAA7SItpwAAAAA8l1BPAAAAAIp05iwAAAAAcqnPuQAAAABhWFc4AAAAAN3gcLsAAAAAqRf8NAAAAADfX3chAAAAADop2cYAAAAAXkRDAAAAAAAE9144AAAAAIeDKI4AAAAAy6DHcAAAAABuF9jQAAAAABddICQAAAAAdqW3OgAAAACWziEwAAAAAGmr+FAAAAAAj1XhIAAAAAC/foK4AAAAAAH5SLYAAAAAI6MKfAAAAACLG+U3AAAAADn/ly0AAAAANgH0tAAAAABN+QB2AAAAAAly8WIAAAAA5AHsOQAAAAAhSzuMAAAAAAflo4YAAAAAcREqzgAAAADCEfxMAAAAAH0l6y4AAAAAcGr/nQAAAAAqDd21AAAAAJYzfLEAAAAAI3IK4wAAAAB1YRabAAAAAOhNxmwAAAAA6N/jNwAAAACIiTZQAAAAAJXTfhgAAAAAK1dacAAAAAA3A/CSAAAAAMAY52MAAAAASmYhwAAAAAAhywp5AAAAAJLXrvIAAAAABIg7zAAAAAC/Pf4IAAAAAAt1nVQAAAAAo9VJOAAAAADwcwdYAAAAAEbzUaYAAAAA6XoZHQAAAACTcjjdAAAAANXrCksAAAAA0oqT3gAAAACzxja2AAAAAJ+cHWQAAAAAMTfUOAAAAAAF5ZPAAAAAADzzboQAAAAAhOYUwwAAAAAuufxEAAAAANB6LDwAAAAAw2ouuwAAAABUF4EMAAAAAGKc/FMAAAAAxoYYWwAAAACideOdAAAAAOxJAuQAAAAAiaBQZwAAAAAbUJMKAAAAADOqykYAAAAAFw4BNgAAAABlBOehAAAAAArQmRIAAAAAnrPfGQAAAACN1jNwAAAAAGyEccoAAAAAp7rovAAAAAAGI0Z3AAAAALb8PJwAAAAALIImuQAAAACQJRU8AAAAAG5cKlkAAAAAIG6fBgAAAAACC273AAAAAP7o+5gAAAAAToXBRwAAAADFqYJ8AAAAAFT/jrsAAAAAYrhP0wAAAABN0ONuAAAAACzQYcAAAAAALM4z2gAAAADLKdG9AAAAAFbJbG4AAAAAi2dK3QAAAADKrnC1AAAAAGsTRpAAAAAANSwMvwAAAABStYiWAAAAADUfVekAAAAAbPDIYgAAAACtadvrAAAAAJbEir8AAAAAcRp4fAAAAADiXAigAAAAAOhvPmUAAAAA56FHTwAAAAD404Q8AAAAAG/Gb1AAAAAABzBngAAAAAB53GsnAAAAACNJByYAAAAAJWk6mAAAAAAR0/H8AAAAABk8q/IAAAAATyzT3QAAAABjLsHtAAAAAHg0v5IAAAAA6OFrMgAAAADajHYNAAAAAG4RtjQAAAAAZ+TORAAAAAAnp1qMAAAAACHwtoMAAAAA+ds8hAAAAABKzPteAAAAAN4eczUAAAAAkvDP0wAAAACqFw2IAAAAAHoBh4cAAAAAePOLRAAAAABT8DC/AAAAADVTwpEAAAAAUYiatQAAAACXZrLGAAAAAN+HV94AAAAAwxkwXQAAAAABQ1UXAAAAAM8BXBAAAAAARVbEhgAAAABnz0LDAAAAAHTQvKQAAAAA4HY1MQAAAABUAbX5AAAAAJgWAP8AAAAABeXZKAAAAABenJweAAAAAEG+UhoAAAAA5NC0pAAAAACDD2m9AAAAAH08/fgAAAAAQAV27AAAAABy04BGAAAAAO1Isa8AAAAAQwhJCwAAAAAlhx20AAAAAFtpMRUAAAAAOho9HQAAAAAiLbXnAAAAACkQ0dEAAAAAZ4mA4gAAAACylZP9AAAAAObE36gAAAAA0oWi3QAAAABLb+MIAAAAAOxj0fIAAAAAltiwmAAAAAAH3U8bAAAAAGcUCo8AAAAApMjanwAAAADJ4qqNAAAAAGpkiMgAAAAAXCXW+AAAAAD6wxo9AAAAALAyHzEAAAAAl44w/QAAAADHBcyvAAAAAGjeZS4AAAAAlvI2NgAAAABQsOQVAAAAAMn5VB8AAAAA270MUQAAAACwmptEAAAAAI5Il2QAAAAASMbr4gAAAAAnA1HMAAAAAPZJyZwAAAAAM/a23QAAAACZ2LlLAAAAAJm6ic4AAAAALfCgigAAAACcnwxSAAAAAK/X8lsAAAAA3eS8AwAAAABGDHhzAAAAAPXy1CQAAAAAD3+vSwAAAADwF1l7AAAAACjQk50AAAAAZA3zGAAAAACUnvzAAAAAAAdBjNMAAAAATfPJPwAAAABLkJCfAAAAAKtnKQsAAAAA3FdwiAAAAACMII9UAAAAAJOB1P0AAAAAhOeLtQAAAAALcTVxAAAAAA0auq0AAAAAu8nHLwAAAABWNSAVAAAAAPduvpIAAAAAKCzZGQAAAACKQpFkAAAAAP+T+IoAAAAAJqrv3QAAAADyaf/GAAAAADSEtpwAAAAA/nbj+QAAAAA/p4CMAAAAAHMd2ZkAAAAA5OCN/gAAAADLZhkKAAAAAHH9PcQAAAAA7As7sQAAAADp584RAAAAAOFghuoAAAAAYSpZeAAAAABqOyxAAAAAAJb5AZkAAAAAHNhbewAAAACruWdUAAAAAIEE2RAAAAAASCZM9AAAAABH2+4+AAAAAHqg1x4AAAAAmwaCDgAAAABZiDpDAAAAALpp8WkAAAAArSNt2AAAAACAI3NMAAAAAK5sIVkAAAAAvrPq9AAAAACn/ZbJAAAAAE1dcIUAAAAAHJptewAAAAAMpNVdAAAAAJD6r4sAAAAAQ8zgSwAAAAAdMeh2AAAAAOH1IFMAAAAA5s/tgQAAAAA5Hmn4AAAAAD0klXcAAAAACWVZYQAAAADv79iWAAAAAPNqwSgAAAAAykW6vAAAAABd1BdYAAAAAISlGyAAAAAABPGqvwAAAAAzMKygAAAAAP7mIPYAAAAANoxv1gAAAAAQO/EvAAAAALzevV8AAAAAMWauuQAAAAB9lS1nAAAAABLXSnAAAAAACY8rLAAAAABu6imZAAAAAApPPxkAAAAAsZI4fwAAAAC0VCfNAAAAAPMGkaUAAAAAo7i9twAAAABgyShMAAAAAO4ZTm0AAAAAXnq4KQAAAADL712mAAAAAKV48z4AAAAAbD77PgAAAABDgyi1AAAAAF1zypgAAAAADED1NQAAAACdQrKsAAAAAMDyv0sAAAAANJNmVAAAAACQJTCWAAAAAKFhpd8AAAAA5dn+rAAAAADZJnjLAAAAAPsJ4DEAAAAA24AiwwAAAAAavNyuAAAAAJ/90BEAAAAAYcYuYwAAAADqjbAXAAAAAFGRlQYAAAAA3BiyxgAAAAAbacbRAAAAAIct0hIAAAAAdxmaPAAAAAAWsk0aAAAAALkGV08AAAAAPlprVAAAAADgvPghAAAAAMDUJL0AAAAAGkuLWwAAAACBczInAAAAAOvMn9AAAAAArLBMOgAAAABSTKSJAAAAAMhvCI4AAAAAe9BgpQAAAAAmEXUwAAAAAIJdol0AAAAAWbmklAAAAACZLiX1AAAAADCnUEsAAAAAdn4b5QAAAABdMEDSAAAAAHiL1TsAAAAAqTnYKQAAAADNLeXxAAAAACoiIRwAAAAAKjX6cQAAAAC50do0AAAAAPf3e8cAAAAAsgk1bgAAAABkW3TRAAAAADRaJYIAAAAAWZdhCQAAAADXiFFwAAAAAPr229wAAAAAMaIXQQAAAABRdq3UAAAAAA754sIAAAAAM0dpbQAAAADzVNFWAAAAABP2M6gAAAAANQUAZwAAAAB/6mUzAAAAABz6VtIAAAAAR82aQgAAAACFyWWoAAAAAEXSQJEAAAAAq1MIyQAAAADmdtDCAAAAAIyfAlMAAAAA1S2biAAAAAD5ht4UAAAAAJ4ITdcAAAAACGZDpAAAAADcT1FtAAAAAFjAPw4AAAAA50Y9RwAAAABxnhZhAAAAAKFjeD0AAAAApIfw+wAAAAB9EQ/HAAAAAGE6DkQAAAAAEX9+IgAAAABGWglfAAAAAGn+J6sAAAAAHlC6gQAAAACJkBE6AAAAAMfj7cMAAAAA+SBbVgAAAABqj5WSAAAAANrwPY4AAAAAg+X9iQAAAACqRLSaAAAAAMmtyHkAAAAAVSX1DQAAAACUE/7bAAAAACvdcOMAAAAAR5cTIQAAAABG6d1bAAAAAH0P85IAAAAAM9OBXgAAAADgEVo3AAAAAL4EtmkAAAAAFu0k3QAAAADUPQN2AAAAAJxxxi0AAAAATgOS1gAAAAAF3krOAAAAAJroewYAAAAAgZ3e0QAAAADyjQwWAAAAAMUurd8AAAAAW2NEZgAAAAAWS3pdAAAAABeXFoIAAAAAfGiTfgAAAABk3nWsAAAAAKhIgLkAAAAAWPFQMgAAAADom33CAAAAACPH41gAAAAAyQp9TgAAAACIP2hdAAAAALAhF/YAAAAATDvpbAAAAAA2FR7bAAAAAPYJcpIAAAAAin3jXgAAAABYltyNAAAAAIpg+ncAAAAA7Va1tQAAAABh4LXsAAAAACrXx+oAAAAA5cCekAAAAACTshg1AAAAAEYaxXAAAAAAA7zFiQAAAADroW2SAAAAAPNdl9wAAAAA5dsX4QAAAAD513YBAAAAAK/w938AAAAATsHv3wAAAADA5xK6AAAAAFxaulgAAAAA8uuuLwAAAAAhyC1jAAAAAB4/5iUAAAAAd860oAAAAAD6dizeAAAAAGbtswYAAAAAgQoojwAAAAAGbUCOAAAAAMs648AAAAAAkfdm2AAAAACNfYnRAAAAAN9oH8cAAAAAvSC+/wAAAADF3yn1AAAAAA1vZPAAAAAAqcLtmAAAAAC7rpqZAAAAAMXEZ4sAAAAARNjWgAAAAABdWyY5AAAAAMEt0jQAAAAAC19z8gAAAAAbqZQPAAAAAK+o9qsAAAAAswuOBgAAAABNAW9GAAAAADt8dF8AAAAAZmMbqgAAAAAPNnCuAAAAAA6DqC0AAAAAhGAFYgAAAAD4nHoyAAAAAIkR+C0AAAAAMLAerQAAAADByP18AAAAAAkH8qEAAAAAHWPWZwAAAAAxMPGHAAAAAELsVw4AAAAAWmd5KwAAAAArsq8/AAAAAJFH0WkAAAAAYTyXywAAAAAjpkmcAAAAAHHaJlQAAAAAr4NHbQAAAAD18ezaAAAAAC5PbzwAAAAAA3gyPQAAAABdThpbAAAAADo+EOgAAAAAIzzALAAAAACmT8oEAAAAAIdvmScAAAAAWi3kQgAAAAA3Y8JQAAAAAKvFxrsAAAAAtjx3AgAAAACxZyDGAAAAAMsolLAAAAAAIGNeQAAAAAA3/8AuAAAAAILty5YAAAAANZSksgAAAAAA1vvJAAAAAJSPF5kAAAAAw57qCQAAAABgpnIHAAAAAPFd/EoAAAAAInH0DgAAAABMy9xzAAAAAOZtFlMAAAAAmldKOQAAAAB5/aofAAAAADAIsywAAAAAnsUS8AAAAAAUuQZGAAAAAAu8oDkAAAAAqvxZgQAAAACSiDrPAAAAAFx5PUQAAAAAqp8DmgAAAACGA3nKAAAAAGV/oVYAAAAAcljhjAAAAAB8PQ+jAAAAAEV+qF4AAAAANMzXTgAAAAAQZqxfAAAAAN60E00AAAAAOG0aAQAAAACCjtNyAAAAAH1BwuAAAAAABNjhCAAAAACFjlG3AAAAAMasoRoAAAAAkKLWjAAAAAAi2G9hAAAAAOPWDZ8AAAAAtqlkLgAAAABo3ad/AAAAAD64FmsAAAAAvC/PSgAAAAA9Hb+xAAAAAK9MAskAAAAAl+XMQgAAAABx39E0AAAAACbIXnwAAAAAJVZ04QAAAABY6X6gAAAAAK3YOHAAAAAAMzZT8QAAAACTuyyGAAAAAL4ATmEAAAAAxd584AAAAACsP8aBAAAAAGtx69oAAAAA9GzMGQAAAADWDVLzAAAAALh9Dl4AAAAAM0x7cQAAAAAD8hzQAAAAAKRxgnsAAAAAYNAHkgAAAAASg9DNAAAAAFVlqe0AAAAAxNytYgAAAADgp8pcAAAAAPJV590AAAAAI08FAAAAAADuVLlZAAAAAAZVpJUAAAAAUeHEowAAAACTWRPMAAAAAIzCkycAAAAANuimLAAAAABrR25WAAAAAAypjtkAAAAA99Xa0QAAAADT7D44AAAAABqQvrcAAAAAlpMhbgAAAAAJeSMxAAAAAHekH1oAAAAAlRRHCgAAAAAjWM/eAAAAAJchAdsAAAAA505bTAAAAABFJtFuAAAAAExN/+EAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==",
|
| 135 |
+
"cuda_rng_state": "kRard2YmFAAAAAAAAAAAAA=="
|
| 136 |
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"files": {
|
| 4 |
+
"config.json": "47558a8af42aad173abcc41b3986c57111d276c5c47ac80abaa0bf9e2145ad16",
|
| 5 |
+
"model.safetensors": "a182a5b9f0999ac162e82f40dab948dba774cb84980ee05f1ad609465fa40b13",
|
| 6 |
+
"optimizer.safetensors": "10cf2a3760984caf0f4d3c0af2ae02dafb7e3599041fad025f9f3591d2baeea2",
|
| 7 |
+
"training_state.json": "daf57e6a48c02fb8e01cf9a3a7c9af449cdd538cee5539be4fabd8fd26b43f33"
|
| 8 |
+
}
|
| 9 |
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"checkpoint_type": "pretrain",
|
| 4 |
+
"model_config": {
|
| 5 |
+
"vocab_size": 1980,
|
| 6 |
+
"max_seq_len": 512,
|
| 7 |
+
"n_outcomes": 11,
|
| 8 |
+
"d_model": 640,
|
| 9 |
+
"n_layers": 10,
|
| 10 |
+
"n_heads": 8,
|
| 11 |
+
"d_ff": 2560,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"rope_base": 10000.0
|
| 14 |
+
},
|
| 15 |
+
"training_config": {
|
| 16 |
+
"lr": 0.0003,
|
| 17 |
+
"weight_decay": 0.01,
|
| 18 |
+
"max_grad_norm": 1.0,
|
| 19 |
+
"warmup_steps": 10000,
|
| 20 |
+
"total_steps": 200000,
|
| 21 |
+
"batch_size": 256,
|
| 22 |
+
"max_ply": 512,
|
| 23 |
+
"discard_ply_limit": false,
|
| 24 |
+
"num_workers": 4,
|
| 25 |
+
"use_amp": true,
|
| 26 |
+
"accumulation_steps": 1,
|
| 27 |
+
"log_interval": 50,
|
| 28 |
+
"eval_interval": 1000,
|
| 29 |
+
"checkpoint_interval": 5000,
|
| 30 |
+
"pause_after_steps": null,
|
| 31 |
+
"no_outcome_token": false,
|
| 32 |
+
"prepend_outcome": false,
|
| 33 |
+
"mate_boost": 0.0,
|
| 34 |
+
"base_seed": 42,
|
| 35 |
+
"val_seed": 9223372036854775807,
|
| 36 |
+
"val_games": 2048,
|
| 37 |
+
"checkpoint_dir": "checkpoints",
|
| 38 |
+
"log_dir": "/workspace/logs",
|
| 39 |
+
"use_wandb": false,
|
| 40 |
+
"wandb_project": "pawn",
|
| 41 |
+
"device": "cuda"
|
| 42 |
+
}
|
| 43 |
+
}
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a182a5b9f0999ac162e82f40dab948dba774cb84980ee05f1ad609465fa40b13
|
| 3 |
+
size 267647080
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10cf2a3760984caf0f4d3c0af2ae02dafb7e3599041fad025f9f3591d2baeea2
|
| 3 |
+
size 535301204
|
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"global_step": 40000,
|
| 4 |
+
"scheduler_state_dict": {
|
| 5 |
+
"step": 40000
|
| 6 |
+
},
|
| 7 |
+
"scaler_state_dict": {
|
| 8 |
+
"scale": 262144.0,
|
| 9 |
+
"growth_factor": 2.0,
|
| 10 |
+
"backoff_factor": 0.5,
|
| 11 |
+
"growth_interval": 2000,
|
| 12 |
+
"_growth_tracker": 703
|
| 13 |
+
},
|
| 14 |
+
"optimizer_meta": {
|
| 15 |
+
"param_groups": [
|
| 16 |
+
{
|
| 17 |
+
"lr": 0.000283728956412876,
|
| 18 |
+
"betas": [
|
| 19 |
+
0.9,
|
| 20 |
+
0.999
|
| 21 |
+
],
|
| 22 |
+
"eps": 1e-08,
|
| 23 |
+
"weight_decay": 0.01,
|
| 24 |
+
"amsgrad": false,
|
| 25 |
+
"maximize": false,
|
| 26 |
+
"foreach": null,
|
| 27 |
+
"capturable": false,
|
| 28 |
+
"differentiable": false,
|
| 29 |
+
"fused": null,
|
| 30 |
+
"decoupled_weight_decay": true,
|
| 31 |
+
"params": [
|
| 32 |
+
0,
|
| 33 |
+
1,
|
| 34 |
+
2,
|
| 35 |
+
3,
|
| 36 |
+
4,
|
| 37 |
+
5,
|
| 38 |
+
6,
|
| 39 |
+
7,
|
| 40 |
+
8,
|
| 41 |
+
9,
|
| 42 |
+
10,
|
| 43 |
+
11,
|
| 44 |
+
12,
|
| 45 |
+
13,
|
| 46 |
+
14,
|
| 47 |
+
15,
|
| 48 |
+
16,
|
| 49 |
+
17,
|
| 50 |
+
18,
|
| 51 |
+
19,
|
| 52 |
+
20,
|
| 53 |
+
21,
|
| 54 |
+
22,
|
| 55 |
+
23,
|
| 56 |
+
24,
|
| 57 |
+
25,
|
| 58 |
+
26,
|
| 59 |
+
27,
|
| 60 |
+
28,
|
| 61 |
+
29,
|
| 62 |
+
30,
|
| 63 |
+
31,
|
| 64 |
+
32,
|
| 65 |
+
33,
|
| 66 |
+
34,
|
| 67 |
+
35,
|
| 68 |
+
36,
|
| 69 |
+
37,
|
| 70 |
+
38,
|
| 71 |
+
39,
|
| 72 |
+
40,
|
| 73 |
+
41,
|
| 74 |
+
42,
|
| 75 |
+
43,
|
| 76 |
+
44,
|
| 77 |
+
45,
|
| 78 |
+
46,
|
| 79 |
+
47,
|
| 80 |
+
48,
|
| 81 |
+
49,
|
| 82 |
+
50,
|
| 83 |
+
51,
|
| 84 |
+
52,
|
| 85 |
+
53,
|
| 86 |
+
54,
|
| 87 |
+
55,
|
| 88 |
+
56,
|
| 89 |
+
57,
|
| 90 |
+
58,
|
| 91 |
+
59,
|
| 92 |
+
60,
|
| 93 |
+
61,
|
| 94 |
+
62,
|
| 95 |
+
63,
|
| 96 |
+
64,
|
| 97 |
+
65,
|
| 98 |
+
66,
|
| 99 |
+
67,
|
| 100 |
+
68,
|
| 101 |
+
69,
|
| 102 |
+
70,
|
| 103 |
+
71,
|
| 104 |
+
72,
|
| 105 |
+
73,
|
| 106 |
+
74,
|
| 107 |
+
75,
|
| 108 |
+
76,
|
| 109 |
+
77,
|
| 110 |
+
78,
|
| 111 |
+
79,
|
| 112 |
+
80,
|
| 113 |
+
81,
|
| 114 |
+
82,
|
| 115 |
+
83,
|
| 116 |
+
84,
|
| 117 |
+
85,
|
| 118 |
+
86,
|
| 119 |
+
87,
|
| 120 |
+
88,
|
| 121 |
+
89,
|
| 122 |
+
90,
|
| 123 |
+
91,
|
| 124 |
+
92,
|
| 125 |
+
93,
|
| 126 |
+
94,
|
| 127 |
+
95,
|
| 128 |
+
96
|
| 129 |
+
]
|
| 130 |
+
}
|
| 131 |
+
],
|
| 132 |
+
"scalars": null
|
| 133 |
+
},
|
| 134 |
+
"torch_rng_state": "hljcGxUfSQ+fAQAAAQAAANIAAAAAAAAAzj3uUQAAAAAlDbfbAAAAAODXdhYAAAAAHAdTtgAAAABPp7tMAAAAAASjDTUAAAAA5xmHeQAAAADgrPl2AAAAAGDTO+4AAAAAEV5vpQAAAAAoLmVHAAAAAOM3XOwAAAAArcqH+QAAAADiw0BbAAAAAGj5lM8AAAAAcw+GYgAAAACrLjMSAAAAADGQlT4AAAAAaZpDJQAAAACBzZ+gAAAAAPc+rTkAAAAAFSQBQwAAAADp6uHMAAAAAHNXWQYAAAAA2lY4AwAAAAAXrHbYAAAAAKP1wi0AAAAA4zBXUwAAAAASoagbAAAAAAMltJoAAAAAyJh3RwAAAACMKoqOAAAAAEFk+8UAAAAABdjhIAAAAACeoY6ZAAAAAN6XSAgAAAAA4I5MvgAAAADLcLbFAAAAAJBiDvMAAAAABtjFygAAAAD/E32LAAAAAEgRBGMAAAAA7SItpwAAAAA8l1BPAAAAAIp05iwAAAAAcqnPuQAAAABhWFc4AAAAAN3gcLsAAAAAqRf8NAAAAADfX3chAAAAADop2cYAAAAAXkRDAAAAAAAE9144AAAAAIeDKI4AAAAAy6DHcAAAAABuF9jQAAAAABddICQAAAAAdqW3OgAAAACWziEwAAAAAGmr+FAAAAAAj1XhIAAAAAC/foK4AAAAAAH5SLYAAAAAI6MKfAAAAACLG+U3AAAAADn/ly0AAAAANgH0tAAAAABN+QB2AAAAAAly8WIAAAAA5AHsOQAAAAAhSzuMAAAAAAflo4YAAAAAcREqzgAAAADCEfxMAAAAAH0l6y4AAAAAcGr/nQAAAAAqDd21AAAAAJYzfLEAAAAAI3IK4wAAAAB1YRabAAAAAOhNxmwAAAAA6N/jNwAAAACIiTZQAAAAAJXTfhgAAAAAK1dacAAAAAA3A/CSAAAAAMAY52MAAAAASmYhwAAAAAAhywp5AAAAAJLXrvIAAAAABIg7zAAAAAC/Pf4IAAAAAAt1nVQAAAAAo9VJOAAAAADwcwdYAAAAAEbzUaYAAAAA6XoZHQAAAACTcjjdAAAAANXrCksAAAAA0oqT3gAAAACzxja2AAAAAJ+cHWQAAAAAMTfUOAAAAAAF5ZPAAAAAADzzboQAAAAAhOYUwwAAAAAuufxEAAAAANB6LDwAAAAAw2ouuwAAAABUF4EMAAAAAGKc/FMAAAAAxoYYWwAAAACideOdAAAAAOxJAuQAAAAAiaBQZwAAAAAbUJMKAAAAADOqykYAAAAAFw4BNgAAAABlBOehAAAAAArQmRIAAAAAnrPfGQAAAACN1jNwAAAAAGyEccoAAAAAp7rovAAAAAAGI0Z3AAAAALb8PJwAAAAALIImuQAAAACQJRU8AAAAAG5cKlkAAAAAIG6fBgAAAAACC273AAAAAP7o+5gAAAAAToXBRwAAAADFqYJ8AAAAAFT/jrsAAAAAYrhP0wAAAABN0ONuAAAAACzQYcAAAAAALM4z2gAAAADLKdG9AAAAAFbJbG4AAAAAi2dK3QAAAADKrnC1AAAAAGsTRpAAAAAANSwMvwAAAABStYiWAAAAADUfVekAAAAAbPDIYgAAAACtadvrAAAAAJbEir8AAAAAcRp4fAAAAADiXAigAAAAAOhvPmUAAAAA56FHTwAAAAD404Q8AAAAAG/Gb1AAAAAABzBngAAAAAB53GsnAAAAACNJByYAAAAAJWk6mAAAAAAR0/H8AAAAABk8q/IAAAAATyzT3QAAAABjLsHtAAAAAHg0v5IAAAAA6OFrMgAAAADajHYNAAAAAG4RtjQAAAAAZ+TORAAAAAAnp1qMAAAAACHwtoMAAAAA+ds8hAAAAABKzPteAAAAAN4eczUAAAAAkvDP0wAAAACqFw2IAAAAAHoBh4cAAAAAePOLRAAAAABT8DC/AAAAADVTwpEAAAAAUYiatQAAAACXZrLGAAAAAN+HV94AAAAAwxkwXQAAAAABQ1UXAAAAAM8BXBAAAAAARVbEhgAAAABnz0LDAAAAAHTQvKQAAAAA4HY1MQAAAABUAbX5AAAAAJgWAP8AAAAABeXZKAAAAABenJweAAAAAEG+UhoAAAAA5NC0pAAAAACDD2m9AAAAAH08/fgAAAAAQAV27AAAAABy04BGAAAAAO1Isa8AAAAAQwhJCwAAAAAlhx20AAAAAFtpMRUAAAAAOho9HQAAAAAiLbXnAAAAACkQ0dEAAAAAZ4mA4gAAAACylZP9AAAAAObE36gAAAAA0oWi3QAAAABLb+MIAAAAAOxj0fIAAAAAltiwmAAAAAAH3U8bAAAAAGcUCo8AAAAApMjanwAAAADJ4qqNAAAAAGpkiMgAAAAAXCXW+AAAAAD6wxo9AAAAALAyHzEAAAAAl44w/QAAAADHBcyvAAAAAGjeZS4AAAAAlvI2NgAAAABQsOQVAAAAAMn5VB8AAAAA270MUQAAAACwmptEAAAAAI5Il2QAAAAASMbr4gAAAAAnA1HMAAAAAPZJyZwAAAAAM/a23QAAAACZ2LlLAAAAAJm6ic4AAAAALfCgigAAAACcnwxSAAAAAK/X8lsAAAAA3eS8AwAAAABGDHhzAAAAAPXy1CQAAAAAD3+vSwAAAADwF1l7AAAAACjQk50AAAAAZA3zGAAAAACUnvzAAAAAAAdBjNMAAAAATfPJPwAAAABLkJCfAAAAAKtnKQsAAAAA3FdwiAAAAACMII9UAAAAAJOB1P0AAAAAhOeLtQAAAAALcTVxAAAAAA0auq0AAAAAu8nHLwAAAABWNSAVAAAAAPduvpIAAAAAKCzZGQAAAACKQpFkAAAAAP+T+IoAAAAAJqrv3QAAAADyaf/GAAAAADSEtpwAAAAA/nbj+QAAAAA/p4CMAAAAAHMd2ZkAAAAA5OCN/gAAAADLZhkKAAAAAHH9PcQAAAAA7As7sQAAAADp584RAAAAAOFghuoAAAAAYSpZeAAAAABqOyxAAAAAAJb5AZkAAAAAHNhbewAAAACruWdUAAAAAIEE2RAAAAAASCZM9AAAAABH2+4+AAAAAHqg1x4AAAAAmwaCDgAAAABZiDpDAAAAALpp8WkAAAAArSNt2AAAAACAI3NMAAAAAK5sIVkAAAAAvrPq9AAAAACn/ZbJAAAAAE1dcIUAAAAAHJptewAAAAAMpNVdAAAAAJD6r4sAAAAAQ8zgSwAAAAAdMeh2AAAAAOH1IFMAAAAA5s/tgQAAAAA5Hmn4AAAAAD0klXcAAAAACWVZYQAAAADv79iWAAAAAPNqwSgAAAAAykW6vAAAAABd1BdYAAAAAISlGyAAAAAABPGqvwAAAAAzMKygAAAAAP7mIPYAAAAANoxv1gAAAAAQO/EvAAAAALzevV8AAAAAMWauuQAAAAB9lS1nAAAAABLXSnAAAAAACY8rLAAAAABu6imZAAAAAApPPxkAAAAAsZI4fwAAAAC0VCfNAAAAAPMGkaUAAAAAo7i9twAAAABgyShMAAAAAO4ZTm0AAAAAXnq4KQAAAADL712mAAAAAKV48z4AAAAAbD77PgAAAABDgyi1AAAAAF1zypgAAAAADED1NQAAAACdQrKsAAAAAMDyv0sAAAAANJNmVAAAAACQJTCWAAAAAKFhpd8AAAAA5dn+rAAAAADZJnjLAAAAAPsJ4DEAAAAA24AiwwAAAAAavNyuAAAAAJ/90BEAAAAAYcYuYwAAAADqjbAXAAAAAFGRlQYAAAAA3BiyxgAAAAAbacbRAAAAAIct0hIAAAAAdxmaPAAAAAAWsk0aAAAAALkGV08AAAAAPlprVAAAAADgvPghAAAAAMDUJL0AAAAAGkuLWwAAAACBczInAAAAAOvMn9AAAAAArLBMOgAAAABSTKSJAAAAAMhvCI4AAAAAe9BgpQAAAAAmEXUwAAAAAIJdol0AAAAAWbmklAAAAACZLiX1AAAAADCnUEsAAAAAdn4b5QAAAABdMEDSAAAAAHiL1TsAAAAAqTnYKQAAAADNLeXxAAAAACoiIRwAAAAAKjX6cQAAAAC50do0AAAAAPf3e8cAAAAAsgk1bgAAAABkW3TRAAAAADRaJYIAAAAAWZdhCQAAAADXiFFwAAAAAPr229wAAAAAMaIXQQAAAABRdq3UAAAAAA754sIAAAAAM0dpbQAAAADzVNFWAAAAABP2M6gAAAAANQUAZwAAAAB/6mUzAAAAABz6VtIAAAAAR82aQgAAAACFyWWoAAAAAEXSQJEAAAAAq1MIyQAAAADmdtDCAAAAAIyfAlMAAAAA1S2biAAAAAD5ht4UAAAAAJ4ITdcAAAAACGZDpAAAAADcT1FtAAAAAFjAPw4AAAAA50Y9RwAAAABxnhZhAAAAAKFjeD0AAAAApIfw+wAAAAB9EQ/HAAAAAGE6DkQAAAAAEX9+IgAAAABGWglfAAAAAGn+J6sAAAAAHlC6gQAAAACJkBE6AAAAAMfj7cMAAAAA+SBbVgAAAABqj5WSAAAAANrwPY4AAAAAg+X9iQAAAACqRLSaAAAAAMmtyHkAAAAAVSX1DQAAAACUE/7bAAAAACvdcOMAAAAAR5cTIQAAAABG6d1bAAAAAH0P85IAAAAAM9OBXgAAAADgEVo3AAAAAL4EtmkAAAAAFu0k3QAAAADUPQN2AAAAAJxxxi0AAAAATgOS1gAAAAAF3krOAAAAAJroewYAAAAAgZ3e0QAAAADyjQwWAAAAAMUurd8AAAAAW2NEZgAAAAAWS3pdAAAAABeXFoIAAAAAfGiTfgAAAABk3nWsAAAAAKhIgLkAAAAAWPFQMgAAAADom33CAAAAACPH41gAAAAAyQp9TgAAAACIP2hdAAAAALAhF/YAAAAATDvpbAAAAAA2FR7bAAAAAPYJcpIAAAAAin3jXgAAAABYltyNAAAAAIpg+ncAAAAA7Va1tQAAAABh4LXsAAAAACrXx+oAAAAA5cCekAAAAACTshg1AAAAAEYaxXAAAAAAA7zFiQAAAADroW2SAAAAAPNdl9wAAAAA5dsX4QAAAAD513YBAAAAAK/w938AAAAATsHv3wAAAADA5xK6AAAAAFxaulgAAAAA8uuuLwAAAAAhyC1jAAAAAB4/5iUAAAAAd860oAAAAAD6dizeAAAAAGbtswYAAAAAgQoojwAAAAAGbUCOAAAAAMs648AAAAAAkfdm2AAAAACNfYnRAAAAAN9oH8cAAAAAvSC+/wAAAADF3yn1AAAAAA1vZPAAAAAAqcLtmAAAAAC7rpqZAAAAAMXEZ4sAAAAARNjWgAAAAABdWyY5AAAAAMEt0jQAAAAAC19z8gAAAAAbqZQPAAAAAK+o9qsAAAAAswuOBgAAAABNAW9GAAAAADt8dF8AAAAAZmMbqgAAAAAPNnCuAAAAAA6DqC0AAAAAhGAFYgAAAAD4nHoyAAAAAIkR+C0AAAAAMLAerQAAAADByP18AAAAAAkH8qEAAAAAHWPWZwAAAAAxMPGHAAAAAELsVw4AAAAAWmd5KwAAAAArsq8/AAAAAJFH0WkAAAAAYTyXywAAAAAjpkmcAAAAAHHaJlQAAAAAr4NHbQAAAAD18ezaAAAAAC5PbzwAAAAAA3gyPQAAAABdThpbAAAAADo+EOgAAAAAIzzALAAAAACmT8oEAAAAAIdvmScAAAAAWi3kQgAAAAA3Y8JQAAAAAKvFxrsAAAAAtjx3AgAAAACxZyDGAAAAAMsolLAAAAAAIGNeQAAAAAA3/8AuAAAAAILty5YAAAAANZSksgAAAAAA1vvJAAAAAJSPF5kAAAAAw57qCQAAAABgpnIHAAAAAPFd/EoAAAAAInH0DgAAAABMy9xzAAAAAOZtFlMAAAAAmldKOQAAAAB5/aofAAAAADAIsywAAAAAnsUS8AAAAAAUuQZGAAAAAAu8oDkAAAAAqvxZgQAAAACSiDrPAAAAAFx5PUQAAAAAqp8DmgAAAACGA3nKAAAAAGV/oVYAAAAAcljhjAAAAAB8PQ+jAAAAAEV+qF4AAAAANMzXTgAAAAAQZqxfAAAAAN60E00AAAAAOG0aAQAAAACCjtNyAAAAAH1BwuAAAAAABNjhCAAAAACFjlG3AAAAAMasoRoAAAAAkKLWjAAAAAAi2G9hAAAAAOPWDZ8AAAAAtqlkLgAAAABo3ad/AAAAAD64FmsAAAAAvC/PSgAAAAA9Hb+xAAAAAK9MAskAAAAAl+XMQgAAAABx39E0AAAAACbIXnwAAAAAJVZ04QAAAABY6X6gAAAAAK3YOHAAAAAAMzZT8QAAAACTuyyGAAAAAL4ATmEAAAAAxd584AAAAACsP8aBAAAAAGtx69oAAAAA9GzMGQAAAADWDVLzAAAAALh9Dl4AAAAAM0x7cQAAAAAD8hzQAAAAAKRxgnsAAAAAYNAHkgAAAAASg9DNAAAAAFVlqe0AAAAAxNytYgAAAADgp8pcAAAAAPJV590AAAAAI08FAAAAAADuVLlZAAAAAAZVpJUAAAAAUeHEowAAAACTWRPMAAAAAIzCkycAAAAANuimLAAAAABrR25WAAAAAAypjtkAAAAA99Xa0QAAAADT7D44AAAAABqQvrcAAAAAlpMhbgAAAAAJeSMxAAAAAHekH1oAAAAAlRRHCgAAAAAjWM/eAAAAAJchAdsAAAAA505bTAAAAABFJtFuAAAAAExN/+EAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==",
|
| 135 |
+
"cuda_rng_state": "kRard2YmFAAAAAAAAAAAAA=="
|
| 136 |
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"files": {
|
| 4 |
+
"config.json": "47558a8af42aad173abcc41b3986c57111d276c5c47ac80abaa0bf9e2145ad16",
|
| 5 |
+
"model.safetensors": "0e638a29f6abeab8a508807f1b9b4db18ebd3f20ca5cd5cb8d83e2b68c3f397b",
|
| 6 |
+
"optimizer.safetensors": "79d7289edeb05486b08cf054de07b9d3385f01b1a9586e5b5d19cbc59e29e37d",
|
| 7 |
+
"training_state.json": "261273621fdc4c7d5ad4271c95d451c7a14369a1df482c954355f253418c83eb"
|
| 8 |
+
}
|
| 9 |
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"checkpoint_type": "pretrain",
|
| 4 |
+
"model_config": {
|
| 5 |
+
"vocab_size": 1980,
|
| 6 |
+
"max_seq_len": 512,
|
| 7 |
+
"n_outcomes": 11,
|
| 8 |
+
"d_model": 640,
|
| 9 |
+
"n_layers": 10,
|
| 10 |
+
"n_heads": 8,
|
| 11 |
+
"d_ff": 2560,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"rope_base": 10000.0
|
| 14 |
+
},
|
| 15 |
+
"training_config": {
|
| 16 |
+
"lr": 0.0003,
|
| 17 |
+
"weight_decay": 0.01,
|
| 18 |
+
"max_grad_norm": 1.0,
|
| 19 |
+
"warmup_steps": 10000,
|
| 20 |
+
"total_steps": 200000,
|
| 21 |
+
"batch_size": 256,
|
| 22 |
+
"max_ply": 512,
|
| 23 |
+
"discard_ply_limit": false,
|
| 24 |
+
"num_workers": 4,
|
| 25 |
+
"use_amp": true,
|
| 26 |
+
"accumulation_steps": 1,
|
| 27 |
+
"log_interval": 50,
|
| 28 |
+
"eval_interval": 1000,
|
| 29 |
+
"checkpoint_interval": 5000,
|
| 30 |
+
"pause_after_steps": null,
|
| 31 |
+
"no_outcome_token": false,
|
| 32 |
+
"prepend_outcome": false,
|
| 33 |
+
"mate_boost": 0.0,
|
| 34 |
+
"base_seed": 42,
|
| 35 |
+
"val_seed": 9223372036854775807,
|
| 36 |
+
"val_games": 2048,
|
| 37 |
+
"checkpoint_dir": "checkpoints",
|
| 38 |
+
"log_dir": "/workspace/logs",
|
| 39 |
+
"use_wandb": false,
|
| 40 |
+
"wandb_project": "pawn",
|
| 41 |
+
"device": "cuda"
|
| 42 |
+
}
|
| 43 |
+
}
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0e638a29f6abeab8a508807f1b9b4db18ebd3f20ca5cd5cb8d83e2b68c3f397b
|
| 3 |
+
size 267647080
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79d7289edeb05486b08cf054de07b9d3385f01b1a9586e5b5d19cbc59e29e37d
|
| 3 |
+
size 535301204
|
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"global_step": 45000,
|
| 4 |
+
"scheduler_state_dict": {
|
| 5 |
+
"step": 45000
|
| 6 |
+
},
|
| 7 |
+
"scaler_state_dict": {
|
| 8 |
+
"scale": 262144.0,
|
| 9 |
+
"growth_factor": 2.0,
|
| 10 |
+
"backoff_factor": 0.5,
|
| 11 |
+
"growth_interval": 2000,
|
| 12 |
+
"_growth_tracker": 592
|
| 13 |
+
},
|
| 14 |
+
"optimizer_meta": {
|
| 15 |
+
"param_groups": [
|
| 16 |
+
{
|
| 17 |
+
"lr": 0.00027801747456544134,
|
| 18 |
+
"betas": [
|
| 19 |
+
0.9,
|
| 20 |
+
0.999
|
| 21 |
+
],
|
| 22 |
+
"eps": 1e-08,
|
| 23 |
+
"weight_decay": 0.01,
|
| 24 |
+
"amsgrad": false,
|
| 25 |
+
"maximize": false,
|
| 26 |
+
"foreach": null,
|
| 27 |
+
"capturable": false,
|
| 28 |
+
"differentiable": false,
|
| 29 |
+
"fused": null,
|
| 30 |
+
"decoupled_weight_decay": true,
|
| 31 |
+
"params": [
|
| 32 |
+
0,
|
| 33 |
+
1,
|
| 34 |
+
2,
|
| 35 |
+
3,
|
| 36 |
+
4,
|
| 37 |
+
5,
|
| 38 |
+
6,
|
| 39 |
+
7,
|
| 40 |
+
8,
|
| 41 |
+
9,
|
| 42 |
+
10,
|
| 43 |
+
11,
|
| 44 |
+
12,
|
| 45 |
+
13,
|
| 46 |
+
14,
|
| 47 |
+
15,
|
| 48 |
+
16,
|
| 49 |
+
17,
|
| 50 |
+
18,
|
| 51 |
+
19,
|
| 52 |
+
20,
|
| 53 |
+
21,
|
| 54 |
+
22,
|
| 55 |
+
23,
|
| 56 |
+
24,
|
| 57 |
+
25,
|
| 58 |
+
26,
|
| 59 |
+
27,
|
| 60 |
+
28,
|
| 61 |
+
29,
|
| 62 |
+
30,
|
| 63 |
+
31,
|
| 64 |
+
32,
|
| 65 |
+
33,
|
| 66 |
+
34,
|
| 67 |
+
35,
|
| 68 |
+
36,
|
| 69 |
+
37,
|
| 70 |
+
38,
|
| 71 |
+
39,
|
| 72 |
+
40,
|
| 73 |
+
41,
|
| 74 |
+
42,
|
| 75 |
+
43,
|
| 76 |
+
44,
|
| 77 |
+
45,
|
| 78 |
+
46,
|
| 79 |
+
47,
|
| 80 |
+
48,
|
| 81 |
+
49,
|
| 82 |
+
50,
|
| 83 |
+
51,
|
| 84 |
+
52,
|
| 85 |
+
53,
|
| 86 |
+
54,
|
| 87 |
+
55,
|
| 88 |
+
56,
|
| 89 |
+
57,
|
| 90 |
+
58,
|
| 91 |
+
59,
|
| 92 |
+
60,
|
| 93 |
+
61,
|
| 94 |
+
62,
|
| 95 |
+
63,
|
| 96 |
+
64,
|
| 97 |
+
65,
|
| 98 |
+
66,
|
| 99 |
+
67,
|
| 100 |
+
68,
|
| 101 |
+
69,
|
| 102 |
+
70,
|
| 103 |
+
71,
|
| 104 |
+
72,
|
| 105 |
+
73,
|
| 106 |
+
74,
|
| 107 |
+
75,
|
| 108 |
+
76,
|
| 109 |
+
77,
|
| 110 |
+
78,
|
| 111 |
+
79,
|
| 112 |
+
80,
|
| 113 |
+
81,
|
| 114 |
+
82,
|
| 115 |
+
83,
|
| 116 |
+
84,
|
| 117 |
+
85,
|
| 118 |
+
86,
|
| 119 |
+
87,
|
| 120 |
+
88,
|
| 121 |
+
89,
|
| 122 |
+
90,
|
| 123 |
+
91,
|
| 124 |
+
92,
|
| 125 |
+
93,
|
| 126 |
+
94,
|
| 127 |
+
95,
|
| 128 |
+
96
|
| 129 |
+
]
|
| 130 |
+
}
|
| 131 |
+
],
|
| 132 |
+
"scalars": null
|
| 133 |
+
},
|
| 134 |
+
"torch_rng_state": "hljcGxUfSQ+fAQAAAQAAANIAAAAAAAAAzj3uUQAAAAAlDbfbAAAAAODXdhYAAAAAHAdTtgAAAABPp7tMAAAAAASjDTUAAAAA5xmHeQAAAADgrPl2AAAAAGDTO+4AAAAAEV5vpQAAAAAoLmVHAAAAAOM3XOwAAAAArcqH+QAAAADiw0BbAAAAAGj5lM8AAAAAcw+GYgAAAACrLjMSAAAAADGQlT4AAAAAaZpDJQAAAACBzZ+gAAAAAPc+rTkAAAAAFSQBQwAAAADp6uHMAAAAAHNXWQYAAAAA2lY4AwAAAAAXrHbYAAAAAKP1wi0AAAAA4zBXUwAAAAASoagbAAAAAAMltJoAAAAAyJh3RwAAAACMKoqOAAAAAEFk+8UAAAAABdjhIAAAAACeoY6ZAAAAAN6XSAgAAAAA4I5MvgAAAADLcLbFAAAAAJBiDvMAAAAABtjFygAAAAD/E32LAAAAAEgRBGMAAAAA7SItpwAAAAA8l1BPAAAAAIp05iwAAAAAcqnPuQAAAABhWFc4AAAAAN3gcLsAAAAAqRf8NAAAAADfX3chAAAAADop2cYAAAAAXkRDAAAAAAAE9144AAAAAIeDKI4AAAAAy6DHcAAAAABuF9jQAAAAABddICQAAAAAdqW3OgAAAACWziEwAAAAAGmr+FAAAAAAj1XhIAAAAAC/foK4AAAAAAH5SLYAAAAAI6MKfAAAAACLG+U3AAAAADn/ly0AAAAANgH0tAAAAABN+QB2AAAAAAly8WIAAAAA5AHsOQAAAAAhSzuMAAAAAAflo4YAAAAAcREqzgAAAADCEfxMAAAAAH0l6y4AAAAAcGr/nQAAAAAqDd21AAAAAJYzfLEAAAAAI3IK4wAAAAB1YRabAAAAAOhNxmwAAAAA6N/jNwAAAACIiTZQAAAAAJXTfhgAAAAAK1dacAAAAAA3A/CSAAAAAMAY52MAAAAASmYhwAAAAAAhywp5AAAAAJLXrvIAAAAABIg7zAAAAAC/Pf4IAAAAAAt1nVQAAAAAo9VJOAAAAADwcwdYAAAAAEbzUaYAAAAA6XoZHQAAAACTcjjdAAAAANXrCksAAAAA0oqT3gAAAACzxja2AAAAAJ+cHWQAAAAAMTfUOAAAAAAF5ZPAAAAAADzzboQAAAAAhOYUwwAAAAAuufxEAAAAANB6LDwAAAAAw2ouuwAAAABUF4EMAAAAAGKc/FMAAAAAxoYYWwAAAACideOdAAAAAOxJAuQAAAAAiaBQZwAAAAAbUJMKAAAAADOqykYAAAAAFw4BNgAAAABlBOehAAAAAArQmRIAAAAAnrPfGQAAAACN1jNwAAAAAGyEccoAAAAAp7rovAAAAAAGI0Z3AAAAALb8PJwAAAAALIImuQAAAACQJRU8AAAAAG5cKlkAAAAAIG6fBgAAAAACC273AAAAAP7o+5gAAAAAToXBRwAAAADFqYJ8AAAAAFT/jrsAAAAAYrhP0wAAAABN0ONuAAAAACzQYcAAAAAALM4z2gAAAADLKdG9AAAAAFbJbG4AAAAAi2dK3QAAAADKrnC1AAAAAGsTRpAAAAAANSwMvwAAAABStYiWAAAAADUfVekAAAAAbPDIYgAAAACtadvrAAAAAJbEir8AAAAAcRp4fAAAAADiXAigAAAAAOhvPmUAAAAA56FHTwAAAAD404Q8AAAAAG/Gb1AAAAAABzBngAAAAAB53GsnAAAAACNJByYAAAAAJWk6mAAAAAAR0/H8AAAAABk8q/IAAAAATyzT3QAAAABjLsHtAAAAAHg0v5IAAAAA6OFrMgAAAADajHYNAAAAAG4RtjQAAAAAZ+TORAAAAAAnp1qMAAAAACHwtoMAAAAA+ds8hAAAAABKzPteAAAAAN4eczUAAAAAkvDP0wAAAACqFw2IAAAAAHoBh4cAAAAAePOLRAAAAABT8DC/AAAAADVTwpEAAAAAUYiatQAAAACXZrLGAAAAAN+HV94AAAAAwxkwXQAAAAABQ1UXAAAAAM8BXBAAAAAARVbEhgAAAABnz0LDAAAAAHTQvKQAAAAA4HY1MQAAAABUAbX5AAAAAJgWAP8AAAAABeXZKAAAAABenJweAAAAAEG+UhoAAAAA5NC0pAAAAACDD2m9AAAAAH08/fgAAAAAQAV27AAAAABy04BGAAAAAO1Isa8AAAAAQwhJCwAAAAAlhx20AAAAAFtpMRUAAAAAOho9HQAAAAAiLbXnAAAAACkQ0dEAAAAAZ4mA4gAAAACylZP9AAAAAObE36gAAAAA0oWi3QAAAABLb+MIAAAAAOxj0fIAAAAAltiwmAAAAAAH3U8bAAAAAGcUCo8AAAAApMjanwAAAADJ4qqNAAAAAGpkiMgAAAAAXCXW+AAAAAD6wxo9AAAAALAyHzEAAAAAl44w/QAAAADHBcyvAAAAAGjeZS4AAAAAlvI2NgAAAABQsOQVAAAAAMn5VB8AAAAA270MUQAAAACwmptEAAAAAI5Il2QAAAAASMbr4gAAAAAnA1HMAAAAAPZJyZwAAAAAM/a23QAAAACZ2LlLAAAAAJm6ic4AAAAALfCgigAAAACcnwxSAAAAAK/X8lsAAAAA3eS8AwAAAABGDHhzAAAAAPXy1CQAAAAAD3+vSwAAAADwF1l7AAAAACjQk50AAAAAZA3zGAAAAACUnvzAAAAAAAdBjNMAAAAATfPJPwAAAABLkJCfAAAAAKtnKQsAAAAA3FdwiAAAAACMII9UAAAAAJOB1P0AAAAAhOeLtQAAAAALcTVxAAAAAA0auq0AAAAAu8nHLwAAAABWNSAVAAAAAPduvpIAAAAAKCzZGQAAAACKQpFkAAAAAP+T+IoAAAAAJqrv3QAAAADyaf/GAAAAADSEtpwAAAAA/nbj+QAAAAA/p4CMAAAAAHMd2ZkAAAAA5OCN/gAAAADLZhkKAAAAAHH9PcQAAAAA7As7sQAAAADp584RAAAAAOFghuoAAAAAYSpZeAAAAABqOyxAAAAAAJb5AZkAAAAAHNhbewAAAACruWdUAAAAAIEE2RAAAAAASCZM9AAAAABH2+4+AAAAAHqg1x4AAAAAmwaCDgAAAABZiDpDAAAAALpp8WkAAAAArSNt2AAAAACAI3NMAAAAAK5sIVkAAAAAvrPq9AAAAACn/ZbJAAAAAE1dcIUAAAAAHJptewAAAAAMpNVdAAAAAJD6r4sAAAAAQ8zgSwAAAAAdMeh2AAAAAOH1IFMAAAAA5s/tgQAAAAA5Hmn4AAAAAD0klXcAAAAACWVZYQAAAADv79iWAAAAAPNqwSgAAAAAykW6vAAAAABd1BdYAAAAAISlGyAAAAAABPGqvwAAAAAzMKygAAAAAP7mIPYAAAAANoxv1gAAAAAQO/EvAAAAALzevV8AAAAAMWauuQAAAAB9lS1nAAAAABLXSnAAAAAACY8rLAAAAABu6imZAAAAAApPPxkAAAAAsZI4fwAAAAC0VCfNAAAAAPMGkaUAAAAAo7i9twAAAABgyShMAAAAAO4ZTm0AAAAAXnq4KQAAAADL712mAAAAAKV48z4AAAAAbD77PgAAAABDgyi1AAAAAF1zypgAAAAADED1NQAAAACdQrKsAAAAAMDyv0sAAAAANJNmVAAAAACQJTCWAAAAAKFhpd8AAAAA5dn+rAAAAADZJnjLAAAAAPsJ4DEAAAAA24AiwwAAAAAavNyuAAAAAJ/90BEAAAAAYcYuYwAAAADqjbAXAAAAAFGRlQYAAAAA3BiyxgAAAAAbacbRAAAAAIct0hIAAAAAdxmaPAAAAAAWsk0aAAAAALkGV08AAAAAPlprVAAAAADgvPghAAAAAMDUJL0AAAAAGkuLWwAAAACBczInAAAAAOvMn9AAAAAArLBMOgAAAABSTKSJAAAAAMhvCI4AAAAAe9BgpQAAAAAmEXUwAAAAAIJdol0AAAAAWbmklAAAAACZLiX1AAAAADCnUEsAAAAAdn4b5QAAAABdMEDSAAAAAHiL1TsAAAAAqTnYKQAAAADNLeXxAAAAACoiIRwAAAAAKjX6cQAAAAC50do0AAAAAPf3e8cAAAAAsgk1bgAAAABkW3TRAAAAADRaJYIAAAAAWZdhCQAAAADXiFFwAAAAAPr229wAAAAAMaIXQQAAAABRdq3UAAAAAA754sIAAAAAM0dpbQAAAADzVNFWAAAAABP2M6gAAAAANQUAZwAAAAB/6mUzAAAAABz6VtIAAAAAR82aQgAAAACFyWWoAAAAAEXSQJEAAAAAq1MIyQAAAADmdtDCAAAAAIyfAlMAAAAA1S2biAAAAAD5ht4UAAAAAJ4ITdcAAAAACGZDpAAAAADcT1FtAAAAAFjAPw4AAAAA50Y9RwAAAABxnhZhAAAAAKFjeD0AAAAApIfw+wAAAAB9EQ/HAAAAAGE6DkQAAAAAEX9+IgAAAABGWglfAAAAAGn+J6sAAAAAHlC6gQAAAACJkBE6AAAAAMfj7cMAAAAA+SBbVgAAAABqj5WSAAAAANrwPY4AAAAAg+X9iQAAAACqRLSaAAAAAMmtyHkAAAAAVSX1DQAAAACUE/7bAAAAACvdcOMAAAAAR5cTIQAAAABG6d1bAAAAAH0P85IAAAAAM9OBXgAAAADgEVo3AAAAAL4EtmkAAAAAFu0k3QAAAADUPQN2AAAAAJxxxi0AAAAATgOS1gAAAAAF3krOAAAAAJroewYAAAAAgZ3e0QAAAADyjQwWAAAAAMUurd8AAAAAW2NEZgAAAAAWS3pdAAAAABeXFoIAAAAAfGiTfgAAAABk3nWsAAAAAKhIgLkAAAAAWPFQMgAAAADom33CAAAAACPH41gAAAAAyQp9TgAAAACIP2hdAAAAALAhF/YAAAAATDvpbAAAAAA2FR7bAAAAAPYJcpIAAAAAin3jXgAAAABYltyNAAAAAIpg+ncAAAAA7Va1tQAAAABh4LXsAAAAACrXx+oAAAAA5cCekAAAAACTshg1AAAAAEYaxXAAAAAAA7zFiQAAAADroW2SAAAAAPNdl9wAAAAA5dsX4QAAAAD513YBAAAAAK/w938AAAAATsHv3wAAAADA5xK6AAAAAFxaulgAAAAA8uuuLwAAAAAhyC1jAAAAAB4/5iUAAAAAd860oAAAAAD6dizeAAAAAGbtswYAAAAAgQoojwAAAAAGbUCOAAAAAMs648AAAAAAkfdm2AAAAACNfYnRAAAAAN9oH8cAAAAAvSC+/wAAAADF3yn1AAAAAA1vZPAAAAAAqcLtmAAAAAC7rpqZAAAAAMXEZ4sAAAAARNjWgAAAAABdWyY5AAAAAMEt0jQAAAAAC19z8gAAAAAbqZQPAAAAAK+o9qsAAAAAswuOBgAAAABNAW9GAAAAADt8dF8AAAAAZmMbqgAAAAAPNnCuAAAAAA6DqC0AAAAAhGAFYgAAAAD4nHoyAAAAAIkR+C0AAAAAMLAerQAAAADByP18AAAAAAkH8qEAAAAAHWPWZwAAAAAxMPGHAAAAAELsVw4AAAAAWmd5KwAAAAArsq8/AAAAAJFH0WkAAAAAYTyXywAAAAAjpkmcAAAAAHHaJlQAAAAAr4NHbQAAAAD18ezaAAAAAC5PbzwAAAAAA3gyPQAAAABdThpbAAAAADo+EOgAAAAAIzzALAAAAACmT8oEAAAAAIdvmScAAAAAWi3kQgAAAAA3Y8JQAAAAAKvFxrsAAAAAtjx3AgAAAACxZyDGAAAAAMsolLAAAAAAIGNeQAAAAAA3/8AuAAAAAILty5YAAAAANZSksgAAAAAA1vvJAAAAAJSPF5kAAAAAw57qCQAAAABgpnIHAAAAAPFd/EoAAAAAInH0DgAAAABMy9xzAAAAAOZtFlMAAAAAmldKOQAAAAB5/aofAAAAADAIsywAAAAAnsUS8AAAAAAUuQZGAAAAAAu8oDkAAAAAqvxZgQAAAACSiDrPAAAAAFx5PUQAAAAAqp8DmgAAAACGA3nKAAAAAGV/oVYAAAAAcljhjAAAAAB8PQ+jAAAAAEV+qF4AAAAANMzXTgAAAAAQZqxfAAAAAN60E00AAAAAOG0aAQAAAACCjtNyAAAAAH1BwuAAAAAABNjhCAAAAACFjlG3AAAAAMasoRoAAAAAkKLWjAAAAAAi2G9hAAAAAOPWDZ8AAAAAtqlkLgAAAABo3ad/AAAAAD64FmsAAAAAvC/PSgAAAAA9Hb+xAAAAAK9MAskAAAAAl+XMQgAAAABx39E0AAAAACbIXnwAAAAAJVZ04QAAAABY6X6gAAAAAK3YOHAAAAAAMzZT8QAAAACTuyyGAAAAAL4ATmEAAAAAxd584AAAAACsP8aBAAAAAGtx69oAAAAA9GzMGQAAAADWDVLzAAAAALh9Dl4AAAAAM0x7cQAAAAAD8hzQAAAAAKRxgnsAAAAAYNAHkgAAAAASg9DNAAAAAFVlqe0AAAAAxNytYgAAAADgp8pcAAAAAPJV590AAAAAI08FAAAAAADuVLlZAAAAAAZVpJUAAAAAUeHEowAAAACTWRPMAAAAAIzCkycAAAAANuimLAAAAABrR25WAAAAAAypjtkAAAAA99Xa0QAAAADT7D44AAAAABqQvrcAAAAAlpMhbgAAAAAJeSMxAAAAAHekH1oAAAAAlRRHCgAAAAAjWM/eAAAAAJchAdsAAAAA505bTAAAAABFJtFuAAAAAExN/+EAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==",
|
| 135 |
+
"cuda_rng_state": "kRard2YmFAAAAAAAAAAAAA=="
|
| 136 |
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"files": {
|
| 4 |
+
"config.json": "47558a8af42aad173abcc41b3986c57111d276c5c47ac80abaa0bf9e2145ad16",
|
| 5 |
+
"model.safetensors": "84cbad0735609f5cedb7ef607964286b847521a4a2acd081d55b7168a1ecd4ea",
|
| 6 |
+
"optimizer.safetensors": "cbf148ecca49b558c116b610c5f239d9184261ade5b632ac3c287e76e59230fe",
|
| 7 |
+
"training_state.json": "7255ace50f9072d0d3b4146671298278e32d849b9d4c2d31ad4d8d467d3cd9dd"
|
| 8 |
+
}
|
| 9 |
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"checkpoint_type": "pretrain",
|
| 4 |
+
"model_config": {
|
| 5 |
+
"vocab_size": 1980,
|
| 6 |
+
"max_seq_len": 512,
|
| 7 |
+
"n_outcomes": 11,
|
| 8 |
+
"d_model": 640,
|
| 9 |
+
"n_layers": 10,
|
| 10 |
+
"n_heads": 8,
|
| 11 |
+
"d_ff": 2560,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"rope_base": 10000.0
|
| 14 |
+
},
|
| 15 |
+
"training_config": {
|
| 16 |
+
"lr": 0.0003,
|
| 17 |
+
"weight_decay": 0.01,
|
| 18 |
+
"max_grad_norm": 1.0,
|
| 19 |
+
"warmup_steps": 10000,
|
| 20 |
+
"total_steps": 200000,
|
| 21 |
+
"batch_size": 256,
|
| 22 |
+
"max_ply": 512,
|
| 23 |
+
"discard_ply_limit": false,
|
| 24 |
+
"num_workers": 4,
|
| 25 |
+
"use_amp": true,
|
| 26 |
+
"accumulation_steps": 1,
|
| 27 |
+
"log_interval": 50,
|
| 28 |
+
"eval_interval": 1000,
|
| 29 |
+
"checkpoint_interval": 5000,
|
| 30 |
+
"pause_after_steps": null,
|
| 31 |
+
"no_outcome_token": false,
|
| 32 |
+
"prepend_outcome": false,
|
| 33 |
+
"mate_boost": 0.0,
|
| 34 |
+
"base_seed": 42,
|
| 35 |
+
"val_seed": 9223372036854775807,
|
| 36 |
+
"val_games": 2048,
|
| 37 |
+
"checkpoint_dir": "checkpoints",
|
| 38 |
+
"log_dir": "/workspace/logs",
|
| 39 |
+
"use_wandb": false,
|
| 40 |
+
"wandb_project": "pawn",
|
| 41 |
+
"device": "cuda"
|
| 42 |
+
}
|
| 43 |
+
}
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84cbad0735609f5cedb7ef607964286b847521a4a2acd081d55b7168a1ecd4ea
|
| 3 |
+
size 267647080
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cbf148ecca49b558c116b610c5f239d9184261ade5b632ac3c287e76e59230fe
|
| 3 |
+
size 535301204
|
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"global_step": 50000,
|
| 4 |
+
"scheduler_state_dict": {
|
| 5 |
+
"step": 50000
|
| 6 |
+
},
|
| 7 |
+
"scaler_state_dict": {
|
| 8 |
+
"scale": 131072.0,
|
| 9 |
+
"growth_factor": 2.0,
|
| 10 |
+
"backoff_factor": 0.5,
|
| 11 |
+
"growth_interval": 2000,
|
| 12 |
+
"_growth_tracker": 1948
|
| 13 |
+
},
|
| 14 |
+
"optimizer_meta": {
|
| 15 |
+
"param_groups": [
|
| 16 |
+
{
|
| 17 |
+
"lr": 0.0002715339687685131,
|
| 18 |
+
"betas": [
|
| 19 |
+
0.9,
|
| 20 |
+
0.999
|
| 21 |
+
],
|
| 22 |
+
"eps": 1e-08,
|
| 23 |
+
"weight_decay": 0.01,
|
| 24 |
+
"amsgrad": false,
|
| 25 |
+
"maximize": false,
|
| 26 |
+
"foreach": null,
|
| 27 |
+
"capturable": false,
|
| 28 |
+
"differentiable": false,
|
| 29 |
+
"fused": null,
|
| 30 |
+
"decoupled_weight_decay": true,
|
| 31 |
+
"params": [
|
| 32 |
+
0,
|
| 33 |
+
1,
|
| 34 |
+
2,
|
| 35 |
+
3,
|
| 36 |
+
4,
|
| 37 |
+
5,
|
| 38 |
+
6,
|
| 39 |
+
7,
|
| 40 |
+
8,
|
| 41 |
+
9,
|
| 42 |
+
10,
|
| 43 |
+
11,
|
| 44 |
+
12,
|
| 45 |
+
13,
|
| 46 |
+
14,
|
| 47 |
+
15,
|
| 48 |
+
16,
|
| 49 |
+
17,
|
| 50 |
+
18,
|
| 51 |
+
19,
|
| 52 |
+
20,
|
| 53 |
+
21,
|
| 54 |
+
22,
|
| 55 |
+
23,
|
| 56 |
+
24,
|
| 57 |
+
25,
|
| 58 |
+
26,
|
| 59 |
+
27,
|
| 60 |
+
28,
|
| 61 |
+
29,
|
| 62 |
+
30,
|
| 63 |
+
31,
|
| 64 |
+
32,
|
| 65 |
+
33,
|
| 66 |
+
34,
|
| 67 |
+
35,
|
| 68 |
+
36,
|
| 69 |
+
37,
|
| 70 |
+
38,
|
| 71 |
+
39,
|
| 72 |
+
40,
|
| 73 |
+
41,
|
| 74 |
+
42,
|
| 75 |
+
43,
|
| 76 |
+
44,
|
| 77 |
+
45,
|
| 78 |
+
46,
|
| 79 |
+
47,
|
| 80 |
+
48,
|
| 81 |
+
49,
|
| 82 |
+
50,
|
| 83 |
+
51,
|
| 84 |
+
52,
|
| 85 |
+
53,
|
| 86 |
+
54,
|
| 87 |
+
55,
|
| 88 |
+
56,
|
| 89 |
+
57,
|
| 90 |
+
58,
|
| 91 |
+
59,
|
| 92 |
+
60,
|
| 93 |
+
61,
|
| 94 |
+
62,
|
| 95 |
+
63,
|
| 96 |
+
64,
|
| 97 |
+
65,
|
| 98 |
+
66,
|
| 99 |
+
67,
|
| 100 |
+
68,
|
| 101 |
+
69,
|
| 102 |
+
70,
|
| 103 |
+
71,
|
| 104 |
+
72,
|
| 105 |
+
73,
|
| 106 |
+
74,
|
| 107 |
+
75,
|
| 108 |
+
76,
|
| 109 |
+
77,
|
| 110 |
+
78,
|
| 111 |
+
79,
|
| 112 |
+
80,
|
| 113 |
+
81,
|
| 114 |
+
82,
|
| 115 |
+
83,
|
| 116 |
+
84,
|
| 117 |
+
85,
|
| 118 |
+
86,
|
| 119 |
+
87,
|
| 120 |
+
88,
|
| 121 |
+
89,
|
| 122 |
+
90,
|
| 123 |
+
91,
|
| 124 |
+
92,
|
| 125 |
+
93,
|
| 126 |
+
94,
|
| 127 |
+
95,
|
| 128 |
+
96
|
| 129 |
+
]
|
| 130 |
+
}
|
| 131 |
+
],
|
| 132 |
+
"scalars": null
|
| 133 |
+
},
|
| 134 |
+
"torch_rng_state": "hljcGxUfSQ+fAQAAAQAAANIAAAAAAAAAzj3uUQAAAAAlDbfbAAAAAODXdhYAAAAAHAdTtgAAAABPp7tMAAAAAASjDTUAAAAA5xmHeQAAAADgrPl2AAAAAGDTO+4AAAAAEV5vpQAAAAAoLmVHAAAAAOM3XOwAAAAArcqH+QAAAADiw0BbAAAAAGj5lM8AAAAAcw+GYgAAAACrLjMSAAAAADGQlT4AAAAAaZpDJQAAAACBzZ+gAAAAAPc+rTkAAAAAFSQBQwAAAADp6uHMAAAAAHNXWQYAAAAA2lY4AwAAAAAXrHbYAAAAAKP1wi0AAAAA4zBXUwAAAAASoagbAAAAAAMltJoAAAAAyJh3RwAAAACMKoqOAAAAAEFk+8UAAAAABdjhIAAAAACeoY6ZAAAAAN6XSAgAAAAA4I5MvgAAAADLcLbFAAAAAJBiDvMAAAAABtjFygAAAAD/E32LAAAAAEgRBGMAAAAA7SItpwAAAAA8l1BPAAAAAIp05iwAAAAAcqnPuQAAAABhWFc4AAAAAN3gcLsAAAAAqRf8NAAAAADfX3chAAAAADop2cYAAAAAXkRDAAAAAAAE9144AAAAAIeDKI4AAAAAy6DHcAAAAABuF9jQAAAAABddICQAAAAAdqW3OgAAAACWziEwAAAAAGmr+FAAAAAAj1XhIAAAAAC/foK4AAAAAAH5SLYAAAAAI6MKfAAAAACLG+U3AAAAADn/ly0AAAAANgH0tAAAAABN+QB2AAAAAAly8WIAAAAA5AHsOQAAAAAhSzuMAAAAAAflo4YAAAAAcREqzgAAAADCEfxMAAAAAH0l6y4AAAAAcGr/nQAAAAAqDd21AAAAAJYzfLEAAAAAI3IK4wAAAAB1YRabAAAAAOhNxmwAAAAA6N/jNwAAAACIiTZQAAAAAJXTfhgAAAAAK1dacAAAAAA3A/CSAAAAAMAY52MAAAAASmYhwAAAAAAhywp5AAAAAJLXrvIAAAAABIg7zAAAAAC/Pf4IAAAAAAt1nVQAAAAAo9VJOAAAAADwcwdYAAAAAEbzUaYAAAAA6XoZHQAAAACTcjjdAAAAANXrCksAAAAA0oqT3gAAAACzxja2AAAAAJ+cHWQAAAAAMTfUOAAAAAAF5ZPAAAAAADzzboQAAAAAhOYUwwAAAAAuufxEAAAAANB6LDwAAAAAw2ouuwAAAABUF4EMAAAAAGKc/FMAAAAAxoYYWwAAAACideOdAAAAAOxJAuQAAAAAiaBQZwAAAAAbUJMKAAAAADOqykYAAAAAFw4BNgAAAABlBOehAAAAAArQmRIAAAAAnrPfGQAAAACN1jNwAAAAAGyEccoAAAAAp7rovAAAAAAGI0Z3AAAAALb8PJwAAAAALIImuQAAAACQJRU8AAAAAG5cKlkAAAAAIG6fBgAAAAACC273AAAAAP7o+5gAAAAAToXBRwAAAADFqYJ8AAAAAFT/jrsAAAAAYrhP0wAAAABN0ONuAAAAACzQYcAAAAAALM4z2gAAAADLKdG9AAAAAFbJbG4AAAAAi2dK3QAAAADKrnC1AAAAAGsTRpAAAAAANSwMvwAAAABStYiWAAAAADUfVekAAAAAbPDIYgAAAACtadvrAAAAAJbEir8AAAAAcRp4fAAAAADiXAigAAAAAOhvPmUAAAAA56FHTwAAAAD404Q8AAAAAG/Gb1AAAAAABzBngAAAAAB53GsnAAAAACNJByYAAAAAJWk6mAAAAAAR0/H8AAAAABk8q/IAAAAATyzT3QAAAABjLsHtAAAAAHg0v5IAAAAA6OFrMgAAAADajHYNAAAAAG4RtjQAAAAAZ+TORAAAAAAnp1qMAAAAACHwtoMAAAAA+ds8hAAAAABKzPteAAAAAN4eczUAAAAAkvDP0wAAAACqFw2IAAAAAHoBh4cAAAAAePOLRAAAAABT8DC/AAAAADVTwpEAAAAAUYiatQAAAACXZrLGAAAAAN+HV94AAAAAwxkwXQAAAAABQ1UXAAAAAM8BXBAAAAAARVbEhgAAAABnz0LDAAAAAHTQvKQAAAAA4HY1MQAAAABUAbX5AAAAAJgWAP8AAAAABeXZKAAAAABenJweAAAAAEG+UhoAAAAA5NC0pAAAAACDD2m9AAAAAH08/fgAAAAAQAV27AAAAABy04BGAAAAAO1Isa8AAAAAQwhJCwAAAAAlhx20AAAAAFtpMRUAAAAAOho9HQAAAAAiLbXnAAAAACkQ0dEAAAAAZ4mA4gAAAACylZP9AAAAAObE36gAAAAA0oWi3QAAAABLb+MIAAAAAOxj0fIAAAAAltiwmAAAAAAH3U8bAAAAAGcUCo8AAAAApMjanwAAAADJ4qqNAAAAAGpkiMgAAAAAXCXW+AAAAAD6wxo9AAAAALAyHzEAAAAAl44w/QAAAADHBcyvAAAAAGjeZS4AAAAAlvI2NgAAAABQsOQVAAAAAMn5VB8AAAAA270MUQAAAACwmptEAAAAAI5Il2QAAAAASMbr4gAAAAAnA1HMAAAAAPZJyZwAAAAAM/a23QAAAACZ2LlLAAAAAJm6ic4AAAAALfCgigAAAACcnwxSAAAAAK/X8lsAAAAA3eS8AwAAAABGDHhzAAAAAPXy1CQAAAAAD3+vSwAAAADwF1l7AAAAACjQk50AAAAAZA3zGAAAAACUnvzAAAAAAAdBjNMAAAAATfPJPwAAAABLkJCfAAAAAKtnKQsAAAAA3FdwiAAAAACMII9UAAAAAJOB1P0AAAAAhOeLtQAAAAALcTVxAAAAAA0auq0AAAAAu8nHLwAAAABWNSAVAAAAAPduvpIAAAAAKCzZGQAAAACKQpFkAAAAAP+T+IoAAAAAJqrv3QAAAADyaf/GAAAAADSEtpwAAAAA/nbj+QAAAAA/p4CMAAAAAHMd2ZkAAAAA5OCN/gAAAADLZhkKAAAAAHH9PcQAAAAA7As7sQAAAADp584RAAAAAOFghuoAAAAAYSpZeAAAAABqOyxAAAAAAJb5AZkAAAAAHNhbewAAAACruWdUAAAAAIEE2RAAAAAASCZM9AAAAABH2+4+AAAAAHqg1x4AAAAAmwaCDgAAAABZiDpDAAAAALpp8WkAAAAArSNt2AAAAACAI3NMAAAAAK5sIVkAAAAAvrPq9AAAAACn/ZbJAAAAAE1dcIUAAAAAHJptewAAAAAMpNVdAAAAAJD6r4sAAAAAQ8zgSwAAAAAdMeh2AAAAAOH1IFMAAAAA5s/tgQAAAAA5Hmn4AAAAAD0klXcAAAAACWVZYQAAAADv79iWAAAAAPNqwSgAAAAAykW6vAAAAABd1BdYAAAAAISlGyAAAAAABPGqvwAAAAAzMKygAAAAAP7mIPYAAAAANoxv1gAAAAAQO/EvAAAAALzevV8AAAAAMWauuQAAAAB9lS1nAAAAABLXSnAAAAAACY8rLAAAAABu6imZAAAAAApPPxkAAAAAsZI4fwAAAAC0VCfNAAAAAPMGkaUAAAAAo7i9twAAAABgyShMAAAAAO4ZTm0AAAAAXnq4KQAAAADL712mAAAAAKV48z4AAAAAbD77PgAAAABDgyi1AAAAAF1zypgAAAAADED1NQAAAACdQrKsAAAAAMDyv0sAAAAANJNmVAAAAACQJTCWAAAAAKFhpd8AAAAA5dn+rAAAAADZJnjLAAAAAPsJ4DEAAAAA24AiwwAAAAAavNyuAAAAAJ/90BEAAAAAYcYuYwAAAADqjbAXAAAAAFGRlQYAAAAA3BiyxgAAAAAbacbRAAAAAIct0hIAAAAAdxmaPAAAAAAWsk0aAAAAALkGV08AAAAAPlprVAAAAADgvPghAAAAAMDUJL0AAAAAGkuLWwAAAACBczInAAAAAOvMn9AAAAAArLBMOgAAAABSTKSJAAAAAMhvCI4AAAAAe9BgpQAAAAAmEXUwAAAAAIJdol0AAAAAWbmklAAAAACZLiX1AAAAADCnUEsAAAAAdn4b5QAAAABdMEDSAAAAAHiL1TsAAAAAqTnYKQAAAADNLeXxAAAAACoiIRwAAAAAKjX6cQAAAAC50do0AAAAAPf3e8cAAAAAsgk1bgAAAABkW3TRAAAAADRaJYIAAAAAWZdhCQAAAADXiFFwAAAAAPr229wAAAAAMaIXQQAAAABRdq3UAAAAAA754sIAAAAAM0dpbQAAAADzVNFWAAAAABP2M6gAAAAANQUAZwAAAAB/6mUzAAAAABz6VtIAAAAAR82aQgAAAACFyWWoAAAAAEXSQJEAAAAAq1MIyQAAAADmdtDCAAAAAIyfAlMAAAAA1S2biAAAAAD5ht4UAAAAAJ4ITdcAAAAACGZDpAAAAADcT1FtAAAAAFjAPw4AAAAA50Y9RwAAAABxnhZhAAAAAKFjeD0AAAAApIfw+wAAAAB9EQ/HAAAAAGE6DkQAAAAAEX9+IgAAAABGWglfAAAAAGn+J6sAAAAAHlC6gQAAAACJkBE6AAAAAMfj7cMAAAAA+SBbVgAAAABqj5WSAAAAANrwPY4AAAAAg+X9iQAAAACqRLSaAAAAAMmtyHkAAAAAVSX1DQAAAACUE/7bAAAAACvdcOMAAAAAR5cTIQAAAABG6d1bAAAAAH0P85IAAAAAM9OBXgAAAADgEVo3AAAAAL4EtmkAAAAAFu0k3QAAAADUPQN2AAAAAJxxxi0AAAAATgOS1gAAAAAF3krOAAAAAJroewYAAAAAgZ3e0QAAAADyjQwWAAAAAMUurd8AAAAAW2NEZgAAAAAWS3pdAAAAABeXFoIAAAAAfGiTfgAAAABk3nWsAAAAAKhIgLkAAAAAWPFQMgAAAADom33CAAAAACPH41gAAAAAyQp9TgAAAACIP2hdAAAAALAhF/YAAAAATDvpbAAAAAA2FR7bAAAAAPYJcpIAAAAAin3jXgAAAABYltyNAAAAAIpg+ncAAAAA7Va1tQAAAABh4LXsAAAAACrXx+oAAAAA5cCekAAAAACTshg1AAAAAEYaxXAAAAAAA7zFiQAAAADroW2SAAAAAPNdl9wAAAAA5dsX4QAAAAD513YBAAAAAK/w938AAAAATsHv3wAAAADA5xK6AAAAAFxaulgAAAAA8uuuLwAAAAAhyC1jAAAAAB4/5iUAAAAAd860oAAAAAD6dizeAAAAAGbtswYAAAAAgQoojwAAAAAGbUCOAAAAAMs648AAAAAAkfdm2AAAAACNfYnRAAAAAN9oH8cAAAAAvSC+/wAAAADF3yn1AAAAAA1vZPAAAAAAqcLtmAAAAAC7rpqZAAAAAMXEZ4sAAAAARNjWgAAAAABdWyY5AAAAAMEt0jQAAAAAC19z8gAAAAAbqZQPAAAAAK+o9qsAAAAAswuOBgAAAABNAW9GAAAAADt8dF8AAAAAZmMbqgAAAAAPNnCuAAAAAA6DqC0AAAAAhGAFYgAAAAD4nHoyAAAAAIkR+C0AAAAAMLAerQAAAADByP18AAAAAAkH8qEAAAAAHWPWZwAAAAAxMPGHAAAAAELsVw4AAAAAWmd5KwAAAAArsq8/AAAAAJFH0WkAAAAAYTyXywAAAAAjpkmcAAAAAHHaJlQAAAAAr4NHbQAAAAD18ezaAAAAAC5PbzwAAAAAA3gyPQAAAABdThpbAAAAADo+EOgAAAAAIzzALAAAAACmT8oEAAAAAIdvmScAAAAAWi3kQgAAAAA3Y8JQAAAAAKvFxrsAAAAAtjx3AgAAAACxZyDGAAAAAMsolLAAAAAAIGNeQAAAAAA3/8AuAAAAAILty5YAAAAANZSksgAAAAAA1vvJAAAAAJSPF5kAAAAAw57qCQAAAABgpnIHAAAAAPFd/EoAAAAAInH0DgAAAABMy9xzAAAAAOZtFlMAAAAAmldKOQAAAAB5/aofAAAAADAIsywAAAAAnsUS8AAAAAAUuQZGAAAAAAu8oDkAAAAAqvxZgQAAAACSiDrPAAAAAFx5PUQAAAAAqp8DmgAAAACGA3nKAAAAAGV/oVYAAAAAcljhjAAAAAB8PQ+jAAAAAEV+qF4AAAAANMzXTgAAAAAQZqxfAAAAAN60E00AAAAAOG0aAQAAAACCjtNyAAAAAH1BwuAAAAAABNjhCAAAAACFjlG3AAAAAMasoRoAAAAAkKLWjAAAAAAi2G9hAAAAAOPWDZ8AAAAAtqlkLgAAAABo3ad/AAAAAD64FmsAAAAAvC/PSgAAAAA9Hb+xAAAAAK9MAskAAAAAl+XMQgAAAABx39E0AAAAACbIXnwAAAAAJVZ04QAAAABY6X6gAAAAAK3YOHAAAAAAMzZT8QAAAACTuyyGAAAAAL4ATmEAAAAAxd584AAAAACsP8aBAAAAAGtx69oAAAAA9GzMGQAAAADWDVLzAAAAALh9Dl4AAAAAM0x7cQAAAAAD8hzQAAAAAKRxgnsAAAAAYNAHkgAAAAASg9DNAAAAAFVlqe0AAAAAxNytYgAAAADgp8pcAAAAAPJV590AAAAAI08FAAAAAADuVLlZAAAAAAZVpJUAAAAAUeHEowAAAACTWRPMAAAAAIzCkycAAAAANuimLAAAAABrR25WAAAAAAypjtkAAAAA99Xa0QAAAADT7D44AAAAABqQvrcAAAAAlpMhbgAAAAAJeSMxAAAAAHekH1oAAAAAlRRHCgAAAAAjWM/eAAAAAJchAdsAAAAA505bTAAAAABFJtFuAAAAAExN/+EAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==",
|
| 135 |
+
"cuda_rng_state": "kRard2YmFAAAAAAAAAAAAA=="
|
| 136 |
+
}
|