{
  "next_trial_id": 11,
  "trials": {
    "0": {
      "trial_id": 0,
      "strategy": "bottleneck",
      "params": {
        "elo_min": 1800,
        "elo_max": 1900,
        "max_games": 4000,
        "val_games": 500,
        "min_ply": 10,
        "total_steps": 100,
        "batch_size": 64,
        "lr": 0.0003,
        "weight_decay": 0.0,
        "warmup_frac": 0.05,
        "warmup_steps": null,
        "max_grad_norm": 1.0,
        "patience": 9999,
        "eval_interval": null,
        "log_interval": 20,
        "pause_after_steps": null,
        "mate_boost": 0.0,
        "no_outcome_token": false,
        "discard_ply_limit": false,
        "amp_dtype": "float16",
        "no_compile": false,
        "sdpa_math": true,
        "num_workers": 0,
        "device": "cuda",
        "log_dir": "/workspace/logs/trial_0000",
        "hf_repo": null,
        "local_checkpoints": true,
        "resume": null,
        "wandb": false,
        "cache_dir": "/dev/shm/pawn_cache",
        "run_type": "adapter",
        "strategy": "bottleneck",
        "checkpoint": "thomas-schweich/pawn-base",
        "pgn": "thomas-schweich/pawn-lichess-full",
        "adapter_layers": null,
        "bottleneck_dim": 16,
        "no_adapt_attn": false,
        "no_adapt_ffn": false,
        "lora_rank": null,
        "lora_targets": null,
        "lora_ffn": false,
        "density": null,
        "sparse_targets": null,
        "sparse_ffn": false,
        "use_output_film": false,
        "rosa_mode": null,
        "rosa_warmup_steps": 128,
        "mask_samples": 32,
        "grad_alpha": 2,
        "unfreeze_layers": null,
        "d_model": null,
        "n_layers": null,
        "n_heads": null,
        "epochs": 9999,
        "val_every": 9999
      },
      "cli_command": [
        "python3",
        "/opt/pawn/scripts/train.py",
        "--config",
        "/workspace/logs/trial_0000/run_config.json"
      ],
      "config": {
        "elo_min": 1800,
        "elo_max": 1900,
        "max_games": 4000,
        "val_games": 500,
        "min_ply": 10,
        "total_steps": 100,
        "batch_size": 64,
        "lr": 0.0003,
        "weight_decay": 0.0,
        "warmup_frac": 0.05,
        "warmup_steps": null,
        "max_grad_norm": 1.0,
        "patience": 9999,
        "eval_interval": null,
        "log_interval": 20,
        "pause_after_steps": null,
        "mate_boost": 0.0,
        "no_outcome_token": false,
        "discard_ply_limit": false,
        "amp_dtype": "float16",
        "no_compile": false,
        "sdpa_math": true,
        "num_workers": 0,
        "device": "cuda",
        "log_dir": "/workspace/logs/trial_0000",
        "hf_repo": null,
        "local_checkpoints": true,
        "resume": null,
        "wandb": false,
        "cache_dir": "/dev/shm/pawn_cache",
        "run_type": "adapter",
        "strategy": "bottleneck",
        "checkpoint": "thomas-schweich/pawn-base",
        "pgn": "thomas-schweich/pawn-lichess-full",
        "adapter_layers": null,
        "bottleneck_dim": 16,
        "no_adapt_attn": false,
        "no_adapt_ffn": false,
        "lora_rank": null,
        "lora_targets": null,
        "lora_ffn": false,
        "density": null,
        "sparse_targets": null,
        "sparse_ffn": false,
        "use_output_film": false,
        "rosa_mode": null,
        "rosa_warmup_steps": 128,
        "mask_samples": 32,
        "grad_alpha": 2,
        "unfreeze_layers": null,
        "d_model": null,
        "n_layers": null,
        "n_heads": null,
        "epochs": 9999,
        "val_every": 9999
      },
      "status": "completed",
      "pid": 60913,
      "gpu_id": 0,
      "start_time": 1775346146.9661582,
      "end_time": 1775346192.2253375,
      "current_step": 100,
      "total_steps": 100,
      "steps_per_sec": 2.499625056241564,
      "last_train_loss": 2.6580834102494304,
      "last_train_acc": 0.2578537154243756,
      "best_val_loss": 2.6815053782271594,
      "best_accuracy": 0.25373134694665184,
      "actual_param_count": 262144,
      "log_path": "/workspace/sweep_results/trial_0000.log",
      "run_dir": "/workspace/logs/trial_0000/bottleneck_20260404_234228_keen-bison",
      "optuna_number": null,
      "notes": "MCP smoke test: 256k bottleneck, 100 steps, MATH+compile+fp16",
      "tags": [
        "mcp-test",
        "bottleneck-small"
      ],
      "eta_seconds": null,
      "eta_human": "?",
      "elapsed_human": "17h18m"
    },
    "1": {
      "trial_id": 1,
      "strategy": "bottleneck",
      "params": {
        "elo_min": 1800,
        "elo_max": 1900,
        "max_games": 4000,
        "val_games": 500,
        "min_ply": 10,
        "total_steps": 30,
        "batch_size": 64,
        "lr": 0.0003,
        "weight_decay": 0.0,
        "warmup_frac": 0.05,
        "warmup_steps": null,
        "max_grad_norm": 1.0,
        "patience": 9999,
        "eval_interval": null,
        "log_interval": 20,
        "pause_after_steps": null,
        "mate_boost": 0.0,
        "no_outcome_token": false,
        "discard_ply_limit": false,
        "amp_dtype": "float16",
        "no_compile": false,
        "sdpa_math": true,
        "num_workers": 0,
        "device": "cuda",
        "log_dir": "/workspace/logs/trial_0000",
        "hf_repo": null,
        "local_checkpoints": true,
        "resume": "/workspace/logs/trial_0000/bottleneck_20260404_234228_keen-bison/checkpoints/best",
        "wandb": false,
        "cache_dir": "/dev/shm/pawn_cache",
        "run_type": "adapter",
        "strategy": "bottleneck",
        "checkpoint": "thomas-schweich/pawn-base",
        "pgn": "thomas-schweich/pawn-lichess-full",
        "adapter_layers": null,
        "bottleneck_dim": 16,
        "no_adapt_attn": false,
        "no_adapt_ffn": false,
        "lora_rank": null,
        "lora_targets": null,
        "lora_ffn": false,
        "density": null,
        "sparse_targets": null,
        "sparse_ffn": false,
        "use_output_film": false,
        "rosa_mode": null,
        "rosa_warmup_steps": 128,
        "mask_samples": 32,
        "grad_alpha": 2,
        "unfreeze_layers": null,
        "d_model": null,
        "n_layers": null,
        "n_heads": null,
        "epochs": 9999,
        "val_every": 9999
      },
      "cli_command": [
        "python3",
        "/opt/pawn/scripts/train.py",
        "--config",
        "/workspace/logs/trial_0001/run_config.json"
      ],
      "config": {
        "elo_min": 1800,
        "elo_max": 1900,
        "max_games": 4000,
        "val_games": 500,
        "min_ply": 10,
        "total_steps": 30,
        "batch_size": 64,
        "lr": 0.0003,
        "weight_decay": 0.0,
        "warmup_frac": 0.05,
        "warmup_steps": null,
        "max_grad_norm": 1.0,
        "patience": 9999,
        "eval_interval": null,
        "log_interval": 20,
        "pause_after_steps": null,
        "mate_boost": 0.0,
        "no_outcome_token": false,
        "discard_ply_limit": false,
        "amp_dtype": "float16",
        "no_compile": false,
        "sdpa_math": true,
        "num_workers": 0,
        "device": "cuda",
        "log_dir": "/workspace/logs/trial_0000",
        "hf_repo": null,
        "local_checkpoints": true,
        "resume": "/workspace/logs/trial_0000/bottleneck_20260404_234228_keen-bison/checkpoints/best",
        "wandb": false,
        "cache_dir": "/dev/shm/pawn_cache",
        "run_type": "adapter",
        "strategy": "bottleneck",
        "checkpoint": "thomas-schweich/pawn-base",
        "pgn": "thomas-schweich/pawn-lichess-full",
        "adapter_layers": null,
        "bottleneck_dim": 16,
        "no_adapt_attn": false,
        "no_adapt_ffn": false,
        "lora_rank": null,
        "lora_targets": null,
        "lora_ffn": false,
        "density": null,
        "sparse_targets": null,
        "sparse_ffn": false,
        "use_output_film": false,
        "rosa_mode": null,
        "rosa_warmup_steps": 128,
        "mask_samples": 32,
        "grad_alpha": 2,
        "unfreeze_layers": null,
        "d_model": null,
        "n_layers": null,
        "n_heads": null,
        "epochs": 9999,
        "val_every": 9999
      },
      "status": "killed",
      "pid": 61357,
      "gpu_id": 0,
      "start_time": 1775346206.78868,
      "end_time": 1775346209.3660913,
      "current_step": 0,
      "total_steps": 30,
      "steps_per_sec": 0.0,
      "last_train_loss": null,
      "last_train_acc": null,
      "best_val_loss": null,
      "best_accuracy": null,
      "actual_param_count": null,
      "log_path": "/workspace/sweep_results/trial_0001.log",
      "run_dir": null,
      "optuna_number": null,
      "notes": "",
      "tags": [
        "mcp-test",
        "bottleneck-small"
      ],
      "eta_seconds": null,
      "eta_human": "?",
      "elapsed_human": "17h17m"
    },
    "2": {
      "trial_id": 2,
      "strategy": "bottleneck",
      "params": {
        "elo_min": 1800,
        "elo_max": 1900,
        "max_games": 3500000,
        "val_games": 50000,
        "min_ply": 10,
        "total_steps": 200000,
        "batch_size": 256,
        "lr": 0.0003,
        "weight_decay": 0.0,
        "warmup_frac": 0.02,
        "warmup_steps": null,
        "max_grad_norm": 1.0,
        "patience": 9999,
        "eval_interval": 2500,
        "log_interval": 50,
        "pause_after_steps": 20000,
        "mate_boost": 0.0,
        "no_outcome_token": false,
        "discard_ply_limit": false,
        "amp_dtype": "bfloat16",
        "no_compile": false,
        "sdpa_math": true,
        "num_workers": 4,
        "device": "cuda",
        "log_dir": "/workspace/logs",
        "hf_repo": null,
        "local_checkpoints": true,
        "resume": null,
        "wandb": false,
        "cache_dir": "/dev/shm/pawn_cache",
        "run_type": "adapter",
        "strategy": "bottleneck",
        "checkpoint": "thomas-schweich/pawn-base",
        "pgn": "thomas-schweich/pawn-lichess-full",
        "adapter_layers": "4,5,6,7",
        "bottleneck_dim": 2440,
        "no_adapt_attn": false,
        "no_adapt_ffn": false,
        "lora_rank": null,
        "lora_targets": null,
        "lora_ffn": false,
        "density": null,
        "sparse_targets": null,
        "sparse_ffn": false,
        "use_output_film": false,
        "rosa_mode": null,
        "rosa_warmup_steps": 128,
        "mask_samples": 32,
        "grad_alpha": 2,
        "unfreeze_layers": null,
        "d_model": null,
        "n_layers": null,
        "n_heads": null,
        "epochs": 9999,
        "val_every": 9999
      },
      "cli_command": [
        "python3",
        "/opt/pawn/scripts/train.py",
        "--config",
        "/workspace/logs/trial_0002/run_config.json"
      ],
      "config": {
        "elo_min": 1800,
        "elo_max": 1900,
        "max_games": 3500000,
        "val_games": 50000,
        "min_ply": 10,
        "total_steps": 200000,
        "batch_size": 256,
        "lr": 0.0003,
        "weight_decay": 0.0,
        "warmup_frac": 0.02,
        "warmup_steps": null,
        "max_grad_norm": 1.0,
        "patience": 9999,
        "eval_interval": 2500,
        "log_interval": 50,
        "pause_after_steps": 20000,
        "mate_boost": 0.0,
        "no_outcome_token": false,
        "discard_ply_limit": false,
        "amp_dtype": "bfloat16",
        "no_compile": false,
        "sdpa_math": true,
        "num_workers": 4,
        "device": "cuda",
        "log_dir": "/workspace/logs",
        "hf_repo": null,
        "local_checkpoints": true,
        "resume": null,
        "wandb": false,
        "cache_dir": "/dev/shm/pawn_cache",
        "run_type": "adapter",
        "strategy": "bottleneck",
        "checkpoint": "thomas-schweich/pawn-base",
        "pgn": "thomas-schweich/pawn-lichess-full",
        "adapter_layers": "4,5,6,7",
        "bottleneck_dim": 2440,
        "no_adapt_attn": false,
        "no_adapt_ffn": false,
        "lora_rank": null,
        "lora_targets": null,
        "lora_ffn": false,
        "density": null,
        "sparse_targets": null,
        "sparse_ffn": false,
        "use_output_film": false,
        "rosa_mode": null,
        "rosa_warmup_steps": 128,
        "mask_samples": 32,
        "grad_alpha": 2,
        "unfreeze_layers": null,
        "d_model": null,
        "n_layers": null,
        "n_heads": null,
        "epochs": 9999,
        "val_every": 9999
      },
      "status": "killed",
      "pid": 71057,
      "gpu_id": 0,
      "start_time": 1775351012.9564307,
      "end_time": 1775351080.0435607,
      "current_step": 0,
      "total_steps": 200000,
      "steps_per_sec": 0.0,
      "last_train_loss": null,
      "last_train_acc": null,
      "best_val_loss": null,
      "best_accuracy": null,
      "actual_param_count": null,
      "log_path": "/workspace/sweep_results/trial_0002.log",
      "run_dir": null,
      "optuna_number": null,
      "notes": "",
      "tags": [
        "18h-push",
        "scaling",
        "20M"
      ],
      "eta_seconds": null,
      "eta_human": "?",
      "elapsed_human": "15h57m"
    },
    "3": {
      "trial_id": 3,
      "strategy": "bottleneck",
      "params": {
        "elo_min": 1800,
        "elo_max": 1900,
        "max_games": 30000000,
        "val_games": 50000,
        "min_ply": 10,
        "total_steps": 200000,
        "batch_size": 256,
        "lr": 0.0003,
        "weight_decay": 0.0,
        "warmup_frac": 0.02,
        "warmup_steps": null,
        "max_grad_norm": 1.0,
        "patience": 9999,
        "eval_interval": 2500,
        "log_interval": 50,
        "pause_after_steps": 20000,
        "mate_boost": 0.0,
        "no_outcome_token": false,
        "discard_ply_limit": false,
        "amp_dtype": "bfloat16",
        "no_compile": false,
        "sdpa_math": true,
        "num_workers": 4,
        "device": "cuda",
        "log_dir": "/workspace/logs",
        "hf_repo": null,
        "local_checkpoints": true,
        "resume": null,
        "wandb": false,
        "cache_dir": "/dev/shm/pawn_cache",
        "run_type": "adapter",
        "strategy": "bottleneck",
        "checkpoint": "thomas-schweich/pawn-base",
        "pgn": "thomas-schweich/pawn-lichess-full",
        "adapter_layers": "4,5,6,7",
        "bottleneck_dim": 2440,
        "no_adapt_attn": false,
        "no_adapt_ffn": false,
        "lora_rank": null,
        "lora_targets": null,
        "lora_ffn": false,
        "density": null,
        "sparse_targets": null,
        "sparse_ffn": false,
        "use_output_film": false,
        "rosa_mode": null,
        "rosa_warmup_steps": 128,
        "mask_samples": 32,
        "grad_alpha": 2,
        "unfreeze_layers": null,
        "d_model": null,
        "n_layers": null,
        "n_heads": null,
        "epochs": 9999,
        "val_every": 9999
      },
      "cli_command": [
        "python3",
        "/opt/pawn/scripts/train.py",
        "--config",
        "/workspace/logs/trial_0003/run_config.json"
      ],
      "config": {
        "elo_min": 1800,
        "elo_max": 1900,
        "max_games": 30000000,
        "val_games": 50000,
        "min_ply": 10,
        "total_steps": 200000,
        "batch_size": 256,
        "lr": 0.0003,
        "weight_decay": 0.0,
        "warmup_frac": 0.02,
        "warmup_steps": null,
        "max_grad_norm": 1.0,
        "patience": 9999,
        "eval_interval": 2500,
        "log_interval": 50,
        "pause_after_steps": 20000,
        "mate_boost": 0.0,
        "no_outcome_token": false,
        "discard_ply_limit": false,
        "amp_dtype": "bfloat16",
        "no_compile": false,
        "sdpa_math": true,
        "num_workers": 4,
        "device": "cuda",
        "log_dir": "/workspace/logs",
        "hf_repo": null,
        "local_checkpoints": true,
        "resume": null,
        "wandb": false,
        "cache_dir": "/dev/shm/pawn_cache",
        "run_type": "adapter",
        "strategy": "bottleneck",
        "checkpoint": "thomas-schweich/pawn-base",
        "pgn": "thomas-schweich/pawn-lichess-full",
        "adapter_layers": "4,5,6,7",
        "bottleneck_dim": 2440,
        "no_adapt_attn": false,
        "no_adapt_ffn": false,
        "lora_rank": null,
        "lora_targets": null,
        "lora_ffn": false,
        "density": null,
        "sparse_targets": null,
        "sparse_ffn": false,
        "use_output_film": false,
        "rosa_mode": null,
        "rosa_warmup_steps": 128,
        "mask_samples": 32,
        "grad_alpha": 2,
        "unfreeze_layers": null,
        "d_model": null,
        "n_layers": null,
        "n_heads": null,
        "epochs": 9999,
        "val_every": 9999
      },
      "status": "killed",
      "pid": 72205,
      "gpu_id": 0,
      "start_time": 1775351111.2472177,
      "end_time": 1775351835.388752,
      "current_step": 0,
      "total_steps": 200000,
      "steps_per_sec": 0.0,
      "last_train_loss": null,
      "last_train_acc": null,
      "best_val_loss": null,
      "best_accuracy": null,
      "actual_param_count": null,
      "log_path": "/workspace/sweep_results/trial_0003.log",
      "run_dir": null,
      "optuna_number": null,
      "notes": "",
      "tags": [
        "18h-push",
        "scaling",
        "20M",
        "full-data"
      ],
      "eta_seconds": null,
      "eta_human": "?",
      "elapsed_human": "15h55m"
    },
    "4": {
      "trial_id": 4,
      "strategy": "bottleneck",
      "params": {
        "elo_min": 1800,
        "elo_max": 1900,
        "max_games": 30000000,
        "val_games": 50000,
        "min_ply": 10,
        "total_steps": 200000,
        "batch_size": 256,
        "lr": 0.0003,
        "weight_decay": 0.0,
        "warmup_frac": 0.02,
        "warmup_steps": null,
        "max_grad_norm": 1.0,
        "patience": 9999,
        "eval_interval": 2500,
        "log_interval": 50,
        "pause_after_steps": 15000,
        "mate_boost": 0.0,
        "no_outcome_token": false,
        "discard_ply_limit": false,
        "amp_dtype": "bfloat16",
        "no_compile": true,
        "sdpa_math": true,
        "num_workers": 4,
        "device": "cuda",
        "log_dir": "/workspace/logs",
        "hf_repo": null,
        "local_checkpoints": true,
        "resume": null,
        "wandb": false,
        "cache_dir": "/dev/shm/pawn_cache",
        "run_type": "adapter",
        "strategy": "bottleneck",
        "checkpoint": "thomas-schweich/pawn-base",
        "pgn": "thomas-schweich/pawn-lichess-full",
        "adapter_layers": "4,5,6,7",
        "bottleneck_dim": 2440,
        "no_adapt_attn": false,
        "no_adapt_ffn": false,
        "lora_rank": null,
        "lora_targets": null,
        "lora_ffn": false,
        "density": null,
        "sparse_targets": null,
        "sparse_ffn": false,
        "use_output_film": false,
        "rosa_mode": null,
        "rosa_warmup_steps": 128,
        "mask_samples": 32,
        "grad_alpha": 2,
        "unfreeze_layers": null,
        "d_model": null,
        "n_layers": null,
        "n_heads": null,
        "epochs": 9999,
        "val_every": 9999
      },
      "cli_command": [
        "python3",
        "/opt/pawn/scripts/train.py",
        "--config",
        "/workspace/logs/trial_0004/run_config.json"
      ],
      "config": {
        "elo_min": 1800,
        "elo_max": 1900,
        "max_games": 30000000,
        "val_games": 50000,
        "min_ply": 10,
        "total_steps": 200000,
        "batch_size": 256,
        "lr": 0.0003,
        "weight_decay": 0.0,
        "warmup_frac": 0.02,
        "warmup_steps": null,
        "max_grad_norm": 1.0,
        "patience": 9999,
        "eval_interval": 2500,
        "log_interval": 50,
        "pause_after_steps": 15000,
        "mate_boost": 0.0,
        "no_outcome_token": false,
        "discard_ply_limit": false,
        "amp_dtype": "bfloat16",
        "no_compile": true,
        "sdpa_math": true,
        "num_workers": 4,
        "device": "cuda",
        "log_dir": "/workspace/logs",
        "hf_repo": null,
        "local_checkpoints": true,
        "resume": null,
        "wandb": false,
        "cache_dir": "/dev/shm/pawn_cache",
        "run_type": "adapter",
        "strategy": "bottleneck",
        "checkpoint": "thomas-schweich/pawn-base",
        "pgn": "thomas-schweich/pawn-lichess-full",
        "adapter_layers": "4,5,6,7",
        "bottleneck_dim": 2440,
        "no_adapt_attn": false,
        "no_adapt_ffn": false,
        "lora_rank": null,
        "lora_targets": null,
        "lora_ffn": false,
        "density": null,
        "sparse_targets": null,
        "sparse_ffn": false,
        "use_output_film": false,
        "rosa_mode": null,
        "rosa_warmup_steps": 128,
        "mask_samples": 32,
        "grad_alpha": 2,
        "unfreeze_layers": null,
        "d_model": null,
        "n_layers": null,
        "n_heads": null,
        "epochs": 9999,
        "val_every": 9999
      },
      "status": "killed",
      "pid": 73543,
      "gpu_id": 0,
      "start_time": 1775351848.6815178,
      "end_time": 1775352133.8517451,
      "current_step": 0,
      "total_steps": 200000,
      "steps_per_sec": 0.0,
      "last_train_loss": null,
      "last_train_acc": null,
      "best_val_loss": null,
      "best_accuracy": null,
      "actual_param_count": null,
      "log_path": "/workspace/sweep_results/trial_0004.log",
      "run_dir": null,
      "optuna_number": null,
      "notes": "",
      "tags": [
        "18h-push",
        "scaling",
        "20M",
        "eager"
      ],
      "eta_seconds": null,
      "eta_human": "?",
      "elapsed_human": "15h43m"
    },
    "5": {
      "trial_id": 5,
      "strategy": "bottleneck",
      "params": {
        "elo_min": 1800,
        "elo_max": 1900,
        "max_games": 30000000,
        "val_games": 50000,
        "min_ply": 10,
        "total_steps": 200000,
        "batch_size": 256,
        "lr": 0.0003,
        "weight_decay": 0.0,
        "warmup_frac": 0.02,
        "warmup_steps": null,
        "max_grad_norm": 1.0,
        "patience": 9999,
        "eval_interval": 2500,
        "log_interval": 50,
        "pause_after_steps": 15000,
        "mate_boost": 0.0,
        "no_outcome_token": false,
        "discard_ply_limit": false,
        "amp_dtype": "bfloat16",
        "no_compile": true,
        "sdpa_math": true,
        "num_workers": 2,
        "device": "cuda",
        "log_dir": "/workspace/logs",
        "hf_repo": null,
        "local_checkpoints": true,
        "resume": null,
        "wandb": false,
        "cache_dir": "/dev/shm/pawn_cache",
        "run_type": "adapter",
        "strategy": "bottleneck",
        "checkpoint": "thomas-schweich/pawn-base",
        "pgn": "thomas-schweich/pawn-lichess-full",
        "adapter_layers": "4,5,6,7",
        "bottleneck_dim": 2440,
        "no_adapt_attn": false,
        "no_adapt_ffn": false,
        "lora_rank": null,
        "lora_targets": null,
        "lora_ffn": false,
        "density": null,
        "sparse_targets": null,
        "sparse_ffn": false,
        "use_output_film": false,
        "rosa_mode": null,
        "rosa_warmup_steps": 128,
        "mask_samples": 32,
        "grad_alpha": 2,
        "unfreeze_layers": null,
        "d_model": null,
        "n_layers": null,
        "n_heads": null,
        "epochs": 9999,
        "val_every": 9999
      },
      "cli_command": [
        "python3",
        "/opt/pawn/scripts/train.py",
        "--config",
        "/workspace/logs/trial_0005/run_config.json"
      ],
      "config": {
        "elo_min": 1800,
        "elo_max": 1900,
        "max_games": 30000000,
        "val_games": 50000,
        "min_ply": 10,
        "total_steps": 200000,
        "batch_size": 256,
        "lr": 0.0003,
        "weight_decay": 0.0,
        "warmup_frac": 0.02,
        "warmup_steps": null,
        "max_grad_norm": 1.0,
        "patience": 9999,
        "eval_interval": 2500,
        "log_interval": 50,
        "pause_after_steps": 15000,
        "mate_boost": 0.0,
        "no_outcome_token": false,
        "discard_ply_limit": false,
        "amp_dtype": "bfloat16",
        "no_compile": true,
        "sdpa_math": true,
        "num_workers": 2,
        "device": "cuda",
        "log_dir": "/workspace/logs",
        "hf_repo": null,
        "local_checkpoints": true,
        "resume": null,
        "wandb": false,
        "cache_dir": "/dev/shm/pawn_cache",
        "run_type": "adapter",
        "strategy": "bottleneck",
        "checkpoint": "thomas-schweich/pawn-base",
        "pgn": "thomas-schweich/pawn-lichess-full",
        "adapter_layers": "4,5,6,7",
        "bottleneck_dim": 2440,
        "no_adapt_attn": false,
        "no_adapt_ffn": false,
        "lora_rank": null,
        "lora_targets": null,
        "lora_ffn": false,
        "density": null,
        "sparse_targets": null,
        "sparse_ffn": false,
        "use_output_film": false,
        "rosa_mode": null,
        "rosa_warmup_steps": 128,
        "mask_samples": 32,
        "grad_alpha": 2,
        "unfreeze_layers": null,
        "d_model": null,
        "n_layers": null,
        "n_heads": null,
        "epochs": 9999,
        "val_every": 9999
      },
      "status": "killed",
      "pid": 74605,
      "gpu_id": 0,
      "start_time": 1775352154.8011737,
      "end_time": 1775352288.1328025,
      "current_step": 0,
      "total_steps": 200000,
      "steps_per_sec": 0.0,
      "last_train_loss": null,
      "last_train_acc": null,
      "best_val_loss": null,
      "best_accuracy": null,
      "actual_param_count": null,
      "log_path": "/workspace/sweep_results/trial_0005.log",
      "run_dir": null,
      "optuna_number": null,
      "notes": "",
      "tags": [
        "18h-push",
        "scaling",
        "20M",
        "workers2"
      ],
      "eta_seconds": null,
      "eta_human": "?",
      "elapsed_human": "15h38m"
    },
    "6": {
      "trial_id": 6,
      "strategy": "bottleneck",
      "params": {
        "elo_min": 1800,
        "elo_max": 1900,
        "max_games": 30000000,
        "val_games": 5000,
        "min_ply": 10,
        "total_steps": 100,
        "batch_size": 64,
        "lr": 0.0003,
        "weight_decay": 0.0,
        "warmup_frac": 0.02,
        "warmup_steps": null,
        "max_grad_norm": 1.0,
        "patience": 9999,
        "eval_interval": 2500,
        "log_interval": 20,
        "pause_after_steps": null,
        "mate_boost": 0.0,
        "no_outcome_token": false,
        "discard_ply_limit": false,
        "amp_dtype": "bfloat16",
        "no_compile": true,
        "sdpa_math": true,
        "num_workers": 2,
        "device": "cuda",
        "log_dir": "/workspace/logs",
        "hf_repo": null,
        "local_checkpoints": true,
        "resume": null,
        "wandb": false,
        "cache_dir": "/dev/shm/pawn_cache",
        "run_type": "adapter",
        "strategy": "bottleneck",
        "checkpoint": "thomas-schweich/pawn-base",
        "pgn": "thomas-schweich/pawn-lichess-full",
        "adapter_layers": "4,5,6,7",
        "bottleneck_dim": 2440,
        "no_adapt_attn": false,
        "no_adapt_ffn": false,
        "lora_rank": null,
        "lora_targets": null,
        "lora_ffn": false,
        "density": null,
        "sparse_targets": null,
        "sparse_ffn": false,
        "use_output_film": false,
        "rosa_mode": null,
        "rosa_warmup_steps": 128,
        "mask_samples": 32,
        "grad_alpha": 2,
        "unfreeze_layers": null,
        "d_model": null,
        "n_layers": null,
        "n_heads": null,
        "epochs": 9999,
        "val_every": 9999
      },
      "cli_command": [
        "python3",
        "/opt/pawn/scripts/train.py",
        "--config",
        "/workspace/logs/trial_0006/run_config.json"
      ],
      "config": {
        "elo_min": 1800,
        "elo_max": 1900,
        "max_games": 30000000,
        "val_games": 5000,
        "min_ply": 10,
        "total_steps": 100,
        "batch_size": 64,
        "lr": 0.0003,
        "weight_decay": 0.0,
        "warmup_frac": 0.02,
        "warmup_steps": null,
        "max_grad_norm": 1.0,
        "patience": 9999,
        "eval_interval": 2500,
        "log_interval": 20,
        "pause_after_steps": null,
        "mate_boost": 0.0,
        "no_outcome_token": false,
        "discard_ply_limit": false,
        "amp_dtype": "bfloat16",
        "no_compile": true,
        "sdpa_math": true,
        "num_workers": 2,
        "device": "cuda",
        "log_dir": "/workspace/logs",
        "hf_repo": null,
        "local_checkpoints": true,
        "resume": null,
        "wandb": false,
        "cache_dir": "/dev/shm/pawn_cache",
        "run_type": "adapter",
        "strategy": "bottleneck",
        "checkpoint": "thomas-schweich/pawn-base",
        "pgn": "thomas-schweich/pawn-lichess-full",
        "adapter_layers": "4,5,6,7",
        "bottleneck_dim": 2440,
        "no_adapt_attn": false,
        "no_adapt_ffn": false,
        "lora_rank": null,
        "lora_targets": null,
        "lora_ffn": false,
        "density": null,
        "sparse_targets": null,
        "sparse_ffn": false,
        "use_output_film": false,
        "rosa_mode": null,
        "rosa_warmup_steps": 128,
        "mask_samples": 32,
        "grad_alpha": 2,
        "unfreeze_layers": null,
        "d_model": null,
        "n_layers": null,
        "n_heads": null,
        "epochs": 9999,
        "val_every": 9999
      },
      "status": "completed",
      "pid": 75207,
      "gpu_id": 0,
      "start_time": 1775352312.4274437,
      "end_time": 1775352342.5313256,
      "current_step": 0,
      "total_steps": 100,
      "steps_per_sec": 0.0,
      "last_train_loss": null,
      "last_train_acc": null,
      "best_val_loss": null,
      "best_accuracy": null,
      "actual_param_count": null,
      "log_path": "/workspace/sweep_results/trial_0006.log",
      "run_dir": null,
      "optuna_number": null,
      "notes": "",
      "tags": [
        "diagnostic",
        "small-bs"
      ],
      "eta_seconds": null,
      "eta_human": "?",
      "elapsed_human": "15h35m"
    },
    "7": {
      "trial_id": 7,
      "strategy": "bottleneck",
      "params": {
        "elo_min": 1800,
        "elo_max": 1900,
        "max_games": 30000000,
        "val_games": 5000,
        "min_ply": 10,
        "total_steps": 100,
        "batch_size": 128,
        "lr": 0.0003,
        "weight_decay": 0.0,
        "warmup_frac": 0.02,
        "warmup_steps": null,
        "max_grad_norm": 1.0,
        "patience": 9999,
        "eval_interval": 2500,
        "log_interval": 20,
        "pause_after_steps": null,
        "mate_boost": 0.0,
        "no_outcome_token": false,
        "discard_ply_limit": false,
        "amp_dtype": "bfloat16",
        "no_compile": true,
        "sdpa_math": true,
        "num_workers": 2,
        "device": "cuda",
        "log_dir": "/workspace/logs",
        "hf_repo": null,
        "local_checkpoints": true,
        "resume": null,
        "wandb": false,
        "cache_dir": "/dev/shm/pawn_cache",
        "run_type": "adapter",
        "strategy": "bottleneck",
        "checkpoint": "thomas-schweich/pawn-base",
        "pgn": "thomas-schweich/pawn-lichess-full",
        "adapter_layers": "4,5,6,7",
        "bottleneck_dim": 2440,
        "no_adapt_attn": false,
        "no_adapt_ffn": false,
        "lora_rank": null,
        "lora_targets": null,
        "lora_ffn": false,
        "density": null,
        "sparse_targets": null,
        "sparse_ffn": false,
        "use_output_film": false,
        "rosa_mode": null,
        "rosa_warmup_steps": 128,
        "mask_samples": 32,
        "grad_alpha": 2,
        "unfreeze_layers": null,
        "d_model": null,
        "n_layers": null,
        "n_heads": null,
        "epochs": 9999,
        "val_every": 9999
      },
      "cli_command": [
        "python3",
        "/opt/pawn/scripts/train.py",
        "--config",
        "/workspace/logs/trial_0007/run_config.json"
      ],
      "config": {
        "elo_min": 1800,
        "elo_max": 1900,
        "max_games": 30000000,
        "val_games": 5000,
        "min_ply": 10,
        "total_steps": 100,
        "batch_size": 128,
        "lr": 0.0003,
        "weight_decay": 0.0,
        "warmup_frac": 0.02,
        "warmup_steps": null,
        "max_grad_norm": 1.0,
        "patience": 9999,
        "eval_interval": 2500,
        "log_interval": 20,
        "pause_after_steps": null,
        "mate_boost": 0.0,
        "no_outcome_token": false,
        "discard_ply_limit": false,
        "amp_dtype": "bfloat16",
        "no_compile": true,
        "sdpa_math": true,
        "num_workers": 2,
        "device": "cuda",
        "log_dir": "/workspace/logs",
        "hf_repo": null,
        "local_checkpoints": true,
        "resume": null,
        "wandb": false,
        "cache_dir": "/dev/shm/pawn_cache",
        "run_type": "adapter",
        "strategy": "bottleneck",
        "checkpoint": "thomas-schweich/pawn-base",
        "pgn": "thomas-schweich/pawn-lichess-full",
        "adapter_layers": "4,5,6,7",
        "bottleneck_dim": 2440,
        "no_adapt_attn": false,
        "no_adapt_ffn": false,
        "lora_rank": null,
        "lora_targets": null,
        "lora_ffn": false,
        "density": null,
        "sparse_targets": null,
        "sparse_ffn": false,
        "use_output_film": false,
        "rosa_mode": null,
        "rosa_warmup_steps": 128,
        "mask_samples": 32,
        "grad_alpha": 2,
        "unfreeze_layers": null,
        "d_model": null,
        "n_layers": null,
        "n_heads": null,
        "epochs": 9999,
        "val_every": 9999
      },
      "status": "completed",
      "pid": 75775,
      "gpu_id": 0,
      "start_time": 1775352384.985535,
      "end_time": 1775352420.1244109,
      "current_step": 0,
      "total_steps": 100,
      "steps_per_sec": 0.0,
      "last_train_loss": null,
      "last_train_acc": null,
      "best_val_loss": null,
      "best_accuracy": null,
      "actual_param_count": null,
      "log_path": "/workspace/sweep_results/trial_0007.log",
      "run_dir": null,
      "optuna_number": null,
      "notes": "",
      "tags": [
        "diagnostic",
        "bs128"
      ],
      "eta_seconds": null,
      "eta_human": "?",
      "elapsed_human": "15h34m"
    },
    "8": {
      "trial_id": 8,
      "strategy": "bottleneck",
      "params": {
        "elo_min": 1800,
        "elo_max": 1900,
        "max_games": 30000000,
        "val_games": 5000,
        "min_ply": 10,
        "total_steps": 50,
        "batch_size": 256,
        "lr": 0.0003,
        "weight_decay": 0.0,
        "warmup_frac": 0.02,
        "warmup_steps": null,
        "max_grad_norm": 1.0,
        "patience": 9999,
        "eval_interval": 2500,
        "log_interval": 10,
        "pause_after_steps": null,
        "mate_boost": 0.0,
        "no_outcome_token": false,
        "discard_ply_limit": false,
        "amp_dtype": "bfloat16",
        "no_compile": true,
        "sdpa_math": true,
        "num_workers": 2,
        "device": "cuda",
        "log_dir": "/workspace/logs",
        "hf_repo": null,
        "local_checkpoints": true,
        "resume": null,
        "wandb": false,
        "cache_dir": "/dev/shm/pawn_cache",
        "run_type": "adapter",
        "strategy": "bottleneck",
        "checkpoint": "thomas-schweich/pawn-base",
        "pgn": "thomas-schweich/pawn-lichess-full",
        "adapter_layers": "4,5,6,7",
        "bottleneck_dim": 2440,
        "no_adapt_attn": false,
        "no_adapt_ffn": false,
        "lora_rank": null,
        "lora_targets": null,
        "lora_ffn": false,
        "density": null,
        "sparse_targets": null,
        "sparse_ffn": false,
        "use_output_film": false,
        "rosa_mode": null,
        "rosa_warmup_steps": 128,
        "mask_samples": 32,
        "grad_alpha": 2,
        "unfreeze_layers": null,
        "d_model": null,
        "n_layers": null,
        "n_heads": null,
        "epochs": 9999,
        "val_every": 9999
      },
      "cli_command": [
        "python3",
        "/opt/pawn/scripts/train.py",
        "--config",
        "/workspace/logs/trial_0008/run_config.json"
      ],
      "config": {
        "elo_min": 1800,
        "elo_max": 1900,
        "max_games": 30000000,
        "val_games": 5000,
        "min_ply": 10,
        "total_steps": 50,
        "batch_size": 256,
        "lr": 0.0003,
        "weight_decay": 0.0,
        "warmup_frac": 0.02,
        "warmup_steps": null,
        "max_grad_norm": 1.0,
        "patience": 9999,
        "eval_interval": 2500,
        "log_interval": 10,
        "pause_after_steps": null,
        "mate_boost": 0.0,
        "no_outcome_token": false,
        "discard_ply_limit": false,
        "amp_dtype": "bfloat16",
        "no_compile": true,
        "sdpa_math": true,
        "num_workers": 2,
        "device": "cuda",
        "log_dir": "/workspace/logs",
        "hf_repo": null,
        "local_checkpoints": true,
        "resume": null,
        "wandb": false,
        "cache_dir": "/dev/shm/pawn_cache",
        "run_type": "adapter",
        "strategy": "bottleneck",
        "checkpoint": "thomas-schweich/pawn-base",
        "pgn": "thomas-schweich/pawn-lichess-full",
        "adapter_layers": "4,5,6,7",
        "bottleneck_dim": 2440,
        "no_adapt_attn": false,
        "no_adapt_ffn": false,
        "lora_rank": null,
        "lora_targets": null,
        "lora_ffn": false,
        "density": null,
        "sparse_targets": null,
        "sparse_ffn": false,
        "use_output_film": false,
        "rosa_mode": null,
        "rosa_warmup_steps": 128,
        "mask_samples": 32,
        "grad_alpha": 2,
        "unfreeze_layers": null,
        "d_model": null,
        "n_layers": null,
        "n_heads": null,
        "epochs": 9999,
        "val_every": 9999
      },
      "status": "completed",
      "pid": 76342,
      "gpu_id": 0,
      "start_time": 1775352440.4786208,
      "end_time": 1775352475.6319711,
      "current_step": 0,
      "total_steps": 50,
      "steps_per_sec": 0.0,
      "last_train_loss": null,
      "last_train_acc": null,
      "best_val_loss": null,
      "best_accuracy": null,
      "actual_param_count": null,
      "log_path": "/workspace/sweep_results/trial_0008.log",
      "run_dir": null,
      "optuna_number": null,
      "notes": "",
      "tags": [
        "diagnostic",
        "bs256"
      ],
      "eta_seconds": null,
      "eta_human": "?",
      "elapsed_human": "15h33m"
    },
    "9": {
      "trial_id": 9,
      "strategy": "bottleneck",
      "params": {
        "elo_min": 1800,
        "elo_max": 1900,
        "max_games": 30000000,
        "val_games": 50000,
        "min_ply": 10,
        "total_steps": 200000,
        "batch_size": 256,
        "lr": 0.0003,
        "weight_decay": 0.0,
        "warmup_frac": 0.02,
        "warmup_steps": null,
        "max_grad_norm": 1.0,
        "patience": 9999,
        "eval_interval": 2500,
        "log_interval": 10,
        "pause_after_steps": 15000,
        "mate_boost": 0.0,
        "no_outcome_token": false,
        "discard_ply_limit": false,
        "amp_dtype": "bfloat16",
        "no_compile": true,
        "sdpa_math": true,
        "num_workers": 2,
        "device": "cuda",
        "log_dir": "/workspace/logs",
        "hf_repo": null,
        "local_checkpoints": true,
        "resume": null,
        "wandb": false,
        "cache_dir": "/dev/shm/pawn_cache",
        "run_type": "adapter",
        "strategy": "bottleneck",
        "checkpoint": "thomas-schweich/pawn-base",
        "pgn": "thomas-schweich/pawn-lichess-full",
        "adapter_layers": "4,5,6,7",
        "bottleneck_dim": 2440,
        "no_adapt_attn": false,
        "no_adapt_ffn": false,
        "lora_rank": null,
        "lora_targets": null,
        "lora_ffn": false,
        "density": null,
        "sparse_targets": null,
        "sparse_ffn": false,
        "use_output_film": false,
        "rosa_mode": null,
        "rosa_warmup_steps": 128,
        "mask_samples": 32,
        "grad_alpha": 2,
        "unfreeze_layers": null,
        "d_model": null,
        "n_layers": null,
        "n_heads": null,
        "epochs": 9999,
        "val_every": 9999
      },
      "cli_command": [
        "python3",
        "/opt/pawn/scripts/train.py",
        "--config",
        "/workspace/logs/trial_0009/run_config.json"
      ],
      "config": {
        "elo_min": 1800,
        "elo_max": 1900,
        "max_games": 30000000,
        "val_games": 50000,
        "min_ply": 10,
        "total_steps": 200000,
        "batch_size": 256,
        "lr": 0.0003,
        "weight_decay": 0.0,
        "warmup_frac": 0.02,
        "warmup_steps": null,
        "max_grad_norm": 1.0,
        "patience": 9999,
        "eval_interval": 2500,
        "log_interval": 10,
        "pause_after_steps": 15000,
        "mate_boost": 0.0,
        "no_outcome_token": false,
        "discard_ply_limit": false,
        "amp_dtype": "bfloat16",
        "no_compile": true,
        "sdpa_math": true,
        "num_workers": 2,
        "device": "cuda",
        "log_dir": "/workspace/logs",
        "hf_repo": null,
        "local_checkpoints": true,
        "resume": null,
        "wandb": false,
        "cache_dir": "/dev/shm/pawn_cache",
        "run_type": "adapter",
        "strategy": "bottleneck",
        "checkpoint": "thomas-schweich/pawn-base",
        "pgn": "thomas-schweich/pawn-lichess-full",
        "adapter_layers": "4,5,6,7",
        "bottleneck_dim": 2440,
        "no_adapt_attn": false,
        "no_adapt_ffn": false,
        "lora_rank": null,
        "lora_targets": null,
        "lora_ffn": false,
        "density": null,
        "sparse_targets": null,
        "sparse_ffn": false,
        "use_output_film": false,
        "rosa_mode": null,
        "rosa_warmup_steps": 128,
        "mask_samples": 32,
        "grad_alpha": 2,
        "unfreeze_layers": null,
        "d_model": null,
        "n_layers": null,
        "n_heads": null,
        "epochs": 9999,
        "val_every": 9999
      },
      "status": "killed",
      "pid": 77221,
      "gpu_id": 0,
      "start_time": 1775352573.273321,
      "end_time": 1775352817.925155,
      "current_step": 0,
      "total_steps": 200000,
      "steps_per_sec": 0.0,
      "last_train_loss": null,
      "last_train_acc": null,
      "best_val_loss": null,
      "best_accuracy": null,
      "actual_param_count": null,
      "log_path": "/workspace/sweep_results/trial_0009.log",
      "run_dir": null,
      "optuna_number": null,
      "notes": "",
      "tags": [
        "18h-push",
        "scaling",
        "20M",
        "bs256"
      ],
      "eta_seconds": null,
      "eta_human": "?",
      "elapsed_human": "15h31m"
    },
    "10": {
      "trial_id": 10,
      "strategy": "bottleneck",
      "params": {
        "elo_min": 1800,
        "elo_max": 1900,
        "max_games": 30000000,
        "val_games": 50000,
        "min_ply": 10,
        "total_steps": 200000,
        "batch_size": 256,
        "lr": 0.0003,
        "weight_decay": 0.0,
        "warmup_frac": 0.02,
        "warmup_steps": null,
        "max_grad_norm": 1.0,
        "patience": 9999,
        "eval_interval": 2500,
        "log_interval": 100,
        "pause_after_steps": 15000,
        "mate_boost": 0.0,
        "no_outcome_token": false,
        "discard_ply_limit": false,
        "amp_dtype": "bfloat16",
        "no_compile": true,
        "sdpa_math": true,
        "num_workers": 2,
        "device": "cuda",
        "log_dir": "/workspace/logs",
        "hf_repo": null,
        "local_checkpoints": true,
        "resume": null,
        "wandb": false,
        "cache_dir": "/dev/shm/pawn_cache",
        "run_type": "adapter",
        "strategy": "bottleneck",
        "checkpoint": "thomas-schweich/pawn-base",
        "pgn": "thomas-schweich/pawn-lichess-full",
        "adapter_layers": "4,5,6,7",
        "bottleneck_dim": 2440,
        "no_adapt_attn": false,
        "no_adapt_ffn": false,
        "lora_rank": null,
        "lora_targets": null,
        "lora_ffn": false,
        "density": null,
        "sparse_targets": null,
        "sparse_ffn": false,
        "use_output_film": false,
        "rosa_mode": null,
        "rosa_warmup_steps": 128,
        "mask_samples": 32,
        "grad_alpha": 2,
        "unfreeze_layers": null,
        "d_model": null,
        "n_layers": null,
        "n_heads": null,
        "epochs": 9999,
        "val_every": 9999
      },
      "cli_command": [
        "python3",
        "/opt/pawn/scripts/train.py",
        "--config",
        "/workspace/logs/trial_0010/run_config.json"
      ],
      "config": {
        "elo_min": 1800,
        "elo_max": 1900,
        "max_games": 30000000,
        "val_games": 50000,
        "min_ply": 10,
        "total_steps": 200000,
        "batch_size": 256,
        "lr": 0.0003,
        "weight_decay": 0.0,
        "warmup_frac": 0.02,
        "warmup_steps": null,
        "max_grad_norm": 1.0,
        "patience": 9999,
        "eval_interval": 2500,
        "log_interval": 100,
        "pause_after_steps": 15000,
        "mate_boost": 0.0,
        "no_outcome_token": false,
        "discard_ply_limit": false,
        "amp_dtype": "bfloat16",
        "no_compile": true,
        "sdpa_math": true,
        "num_workers": 2,
        "device": "cuda",
        "log_dir": "/workspace/logs",
        "hf_repo": null,
        "local_checkpoints": true,
        "resume": null,
        "wandb": false,
        "cache_dir": "/dev/shm/pawn_cache",
        "run_type": "adapter",
        "strategy": "bottleneck",
        "checkpoint": "thomas-schweich/pawn-base",
        "pgn": "thomas-schweich/pawn-lichess-full",
        "adapter_layers": "4,5,6,7",
        "bottleneck_dim": 2440,
        "no_adapt_attn": false,
        "no_adapt_ffn": false,
        "lora_rank": null,
        "lora_targets": null,
        "lora_ffn": false,
        "density": null,
        "sparse_targets": null,
        "sparse_ffn": false,
        "use_output_film": false,
        "rosa_mode": null,
        "rosa_warmup_steps": 128,
        "mask_samples": 32,
        "grad_alpha": 2,
        "unfreeze_layers": null,
        "d_model": null,
        "n_layers": null,
        "n_heads": null,
        "epochs": 9999,
        "val_every": 9999
      },
      "status": "completed",
      "pid": 78502,
      "gpu_id": 0,
      "start_time": 1775352861.543256,
      "end_time": 1775356243.7400186,
      "current_step": 0,
      "total_steps": 200000,
      "steps_per_sec": 0.0,
      "last_train_loss": null,
      "last_train_acc": null,
      "best_val_loss": null,
      "best_accuracy": null,
      "actual_param_count": null,
      "log_path": "/workspace/sweep_results/trial_0010.log",
      "run_dir": null,
      "optuna_number": null,
      "notes": "",
      "tags": [
        "18h-push",
        "20M",
        "bottleneck",
        "scaling"
      ],
      "eta_seconds": null,
      "eta_human": "?",
      "elapsed_human": "15h26m"
    }
  },
  "event_seq": 27,
  "start_time": 1775342775.318943,
  "cost_per_hour": 2.49
}