Spaces:
Running
Running
Deploy v6.0-genesis from GitHub main
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .dockerignore +17 -17
- .gitattributes +15 -0
- .gitignore +46 -14
- Dockerfile +32 -32
- Dockerfile.damocles +4 -4
- FINAL_SUBMIT/ALL_250_FEATURES_LIVE_PROOF.md +3 -3
- FINAL_SUBMIT/ARCHITECTURE.md +3 -3
- FINAL_SUBMIT/BENCHMARK_REPORT.md +3 -3
- FINAL_SUBMIT/COLD_OPEN_OPENING_LINES.md +26 -26
- FINAL_SUBMIT/DATASET_CARD.md +3 -3
- FINAL_SUBMIT/ENV_CARD.md +1 -1
- FINAL_SUBMIT/FEATURE_INVENTORY.md +27 -27
- FINAL_SUBMIT/FEATURE_INVENTORY_DI.md +7 -7
- FINAL_SUBMIT/FEATURE_INVENTORY_JT.md +13 -13
- FINAL_SUBMIT/FEATURE_INVENTORY_UBB.md +40 -40
- FINAL_SUBMIT/HACKATHON_README.md +2 -2
- FINAL_SUBMIT/JUDGE_FAQ_30.md +1 -1
- FINAL_SUBMIT/JUDGE_OBJECTION_HANDBOOK.md +1 -1
- FINAL_SUBMIT/MASTER_FEATURE_USECASE_MAP_250.md +10 -10
- FINAL_SUBMIT/README.md +12 -12
- FINAL_SUBMIT/RELIANCE_HORMUZ_DEEP_DIVE.md +1 -1
- FINAL_SUBMIT/REPRODUCE.md +2 -2
- FINAL_SUBMIT/REPRODUCE_ONE_BASH.sh +3 -3
- FINAL_SUBMIT/RL_GUIDE_59POINT_ALIGNMENT.md +2 -2
- FINAL_SUBMIT/docker/Dockerfile.api +25 -0
- FINAL_SUBMIT/docker/docker-compose.yml +41 -0
- FINAL_SUBMIT/receipts/F2_multi_agent_apple_samsung_toyota.json +116 -116
- FINAL_SUBMIT/receipts/ONNX_BUNDLE_MANIFEST.json +71 -71
- FINAL_SUBMIT/receipts/R2_SHAP_FAIRNESS_CALIBRATION.json +501 -501
- FINAL_SUBMIT/receipts/R3_BIGTFT_INTEGRATION.json +51 -51
- FINAL_SUBMIT/receipts/R3_PAST_SELF.json +0 -0
- FINAL_SUBMIT/receipts/R3_STACKING_V2.json +1187 -1187
- FINAL_SUBMIT/receipts/R3_STACKING_V3_POINTLEVEL.json +226 -226
- FINAL_SUBMIT/receipts/R3_TIMESFM_QUANTILE.json +129 -129
- FINAL_SUBMIT/receipts/R4_DANGEROUS_V2.json +0 -0
- FINAL_SUBMIT/receipts/R4_DANGEROUS_V2_ABLATION.json +396 -396
- FINAL_SUBMIT/receipts/R4_DANGEROUS_V2_LIVE.json +63 -63
- FINAL_SUBMIT/receipts/R4_FRONTIER_PANEL_V2.json +0 -0
- FINAL_SUBMIT/receipts/R5_BEIR_MANUAL.json +1022 -1022
- FINAL_SUBMIT/receipts/R5_GRANITE.json +0 -0
- FINAL_SUBMIT/receipts/R5_GRANITE_HARD.json +0 -0
- FINAL_SUBMIT/receipts/R6_ALGO_COMPARISON.json +71 -71
- FINAL_SUBMIT/receipts/R6_AQUA_REGIA_V2.json +859 -859
- FINAL_SUBMIT/receipts/R6_GETHSEMANE.json +121 -121
- FINAL_SUBMIT/receipts/R6_GETHSEMANE_ONNX_EXPORT.json +24 -24
- FINAL_SUBMIT/receipts/R6_PROVIDER_V2.json +329 -329
- FINAL_SUBMIT/receipts/R6_PROVIDER_v1_F1.json +1755 -1755
- FINAL_SUBMIT/receipts/ablation_matrix.json +94 -94
- FINAL_SUBMIT/receipts/adversarial_20_attack_gauntlet.json +216 -216
- FINAL_SUBMIT/receipts/adversarial_reward_audit.json +131 -131
.dockerignore
CHANGED
|
@@ -1,17 +1,17 @@
|
|
| 1 |
-
.git
|
| 2 |
-
__pycache__
|
| 3 |
-
*.pyc
|
| 4 |
-
.pytest_cache
|
| 5 |
-
.mypy_cache
|
| 6 |
-
.ruff_cache
|
| 7 |
-
tests/
|
| 8 |
-
.env
|
| 9 |
-
.env.*
|
| 10 |
-
*.egg-info
|
| 11 |
-
dist/
|
| 12 |
-
build/
|
| 13 |
-
.vscode/
|
| 14 |
-
.idea/
|
| 15 |
-
*.md
|
| 16 |
-
!README.md
|
| 17 |
-
LICENSE
|
|
|
|
| 1 |
+
.git
|
| 2 |
+
__pycache__
|
| 3 |
+
*.pyc
|
| 4 |
+
.pytest_cache
|
| 5 |
+
.mypy_cache
|
| 6 |
+
.ruff_cache
|
| 7 |
+
tests/
|
| 8 |
+
.env
|
| 9 |
+
.env.*
|
| 10 |
+
*.egg-info
|
| 11 |
+
dist/
|
| 12 |
+
build/
|
| 13 |
+
.vscode/
|
| 14 |
+
.idea/
|
| 15 |
+
*.md
|
| 16 |
+
!README.md
|
| 17 |
+
LICENSE
|
.gitattributes
CHANGED
|
@@ -68,3 +68,18 @@ ShAuRyA_Phoenix/autoresearch_fixed/experiments/seed1000_candidate/policy.zip fil
|
|
| 68 |
ShAuRyA_Phoenix/autoresearch_fixed/experiments/seed1001_candidate/policy.zip filter=lfs diff=lfs merge=lfs -text
|
| 69 |
FINAL_SUBMIT/plots/real_reinforce_curve.png filter=lfs diff=lfs merge=lfs -text
|
| 70 |
FINAL_SUBMIT/plots/real_reinforce_curve_v2.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
ShAuRyA_Phoenix/autoresearch_fixed/experiments/seed1001_candidate/policy.zip filter=lfs diff=lfs merge=lfs -text
|
| 69 |
FINAL_SUBMIT/plots/real_reinforce_curve.png filter=lfs diff=lfs merge=lfs -text
|
| 70 |
FINAL_SUBMIT/plots/real_reinforce_curve_v2.png filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
versions/v3_arcadia/plots/aqua_regia/r6_aqua_regia.png filter=lfs diff=lfs merge=lfs -text
|
| 72 |
+
versions/v3_arcadia/plots/dangerous/r4_summary.png filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
versions/v3_arcadia/plots/dangerous/r4v2_heatmap.png filter=lfs diff=lfs merge=lfs -text
|
| 74 |
+
versions/v3_arcadia/plots/gethsemane/learning_curves.png filter=lfs diff=lfs merge=lfs -text
|
| 75 |
+
versions/v3_arcadia/plots/granite/r5_per_query_heatmap.png filter=lfs diff=lfs merge=lfs -text
|
| 76 |
+
versions/v3_arcadia/plots/hero_result_card.png filter=lfs diff=lfs merge=lfs -text
|
| 77 |
+
versions/v3_arcadia/plots/past_self/r3_summary.png filter=lfs diff=lfs merge=lfs -text
|
| 78 |
+
versions/v4_arcadia_live/features/gcn_attn/gcn_attn_easy_graph.png filter=lfs diff=lfs merge=lfs -text
|
| 79 |
+
versions/v4_arcadia_live/features/gcn_attn/gcn_attn_hard_graph.png filter=lfs diff=lfs merge=lfs -text
|
| 80 |
+
versions/v4_arcadia_live/features/gcn_attn/gcn_attn_medium_graph.png filter=lfs diff=lfs merge=lfs -text
|
| 81 |
+
versions/v4_arcadia_live/scenarios/crisis_library_v2.faiss filter=lfs diff=lfs merge=lfs -text
|
| 82 |
+
versions/v4_arcadia_live/scenarios/crisis_library_v2_emb.npz filter=lfs diff=lfs merge=lfs -text
|
| 83 |
+
versions/v5_phoenix/action_v2/conformal_calibrated.pt filter=lfs diff=lfs merge=lfs -text
|
| 84 |
+
versions/v5_phoenix/autoresearch_fixed/experiments/seed1000_candidate/policy.zip filter=lfs diff=lfs merge=lfs -text
|
| 85 |
+
versions/v5_phoenix/autoresearch_fixed/experiments/seed1001_candidate/policy.zip filter=lfs diff=lfs merge=lfs -text
|
.gitignore
CHANGED
|
@@ -12,6 +12,11 @@ env/
|
|
| 12 |
|
| 13 |
# Tooling state
|
| 14 |
.claude/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
# Stray pip version artifacts
|
| 17 |
0.*/
|
|
@@ -56,21 +61,48 @@ models/
|
|
| 56 |
sota-bundle/
|
| 57 |
external_data/
|
| 58 |
catboost_info/
|
| 59 |
-
v3_arcadia/tools/
|
| 60 |
-
v3_arcadia/gguf_out/
|
| 61 |
|
| 62 |
# Auto-generated embedding caches + SB3 best/ dirs
|
| 63 |
-
v3_arcadia/checkpoints/granite/corpus_emb_*.npy
|
| 64 |
-
v3_arcadia/checkpoints/gethsemane/best_*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
# v4 arcadia-live auto-generated state
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
# Tooling state
|
| 14 |
.claude/
|
| 15 |
+
.agents/
|
| 16 |
+
.source_cache/
|
| 17 |
+
.tmp_pytest/
|
| 18 |
+
.pytest_cache/
|
| 19 |
+
wandb/
|
| 20 |
|
| 21 |
# Stray pip version artifacts
|
| 22 |
0.*/
|
|
|
|
| 61 |
sota-bundle/
|
| 62 |
external_data/
|
| 63 |
catboost_info/
|
| 64 |
+
versions/v3_arcadia/tools/
|
| 65 |
+
versions/v3_arcadia/gguf_out/
|
| 66 |
|
| 67 |
# Auto-generated embedding caches + SB3 best/ dirs
|
| 68 |
+
versions/v3_arcadia/checkpoints/granite/corpus_emb_*.npy
|
| 69 |
+
versions/v3_arcadia/checkpoints/gethsemane/best_*/
|
| 70 |
+
|
| 71 |
+
# Third-party source checkouts (not our code) — vendored under vendor/
|
| 72 |
+
vendor/
|
| 73 |
+
|
| 74 |
+
# Phoenix v5 auto-generated state (keep source code, exclude heavy + auto-gen)
|
| 75 |
+
versions/v5_phoenix/.venv-roll/
|
| 76 |
+
versions/v5_phoenix/.venv/
|
| 77 |
+
versions/v5_phoenix/experiments/dpo_judge_v1/checkpoints/
|
| 78 |
+
versions/v5_phoenix/experiments/dpo_judge_v1/adapter/
|
| 79 |
+
versions/v5_phoenix/roll_integration/dpo_judge/adapter/
|
| 80 |
+
versions/v5_phoenix/**/__pycache__/
|
| 81 |
+
versions/v5_phoenix/**/*.pyc
|
| 82 |
+
versions/v5_phoenix/**/*.log
|
| 83 |
+
versions/v5_phoenix/receipts_v2/*.stdout
|
| 84 |
|
| 85 |
# v4 arcadia-live auto-generated state
|
| 86 |
+
versions/v4_arcadia_live/realtime/events.db
|
| 87 |
+
versions/v4_arcadia_live/realtime/events.db-journal
|
| 88 |
+
versions/v4_arcadia_live/realtime/library_embeddings.pkl
|
| 89 |
+
versions/v4_arcadia_live/realtime/vessel_snapshot_hormuz.json
|
| 90 |
+
versions/v4_arcadia_live/autoresearch/experiments/
|
| 91 |
+
versions/v4_arcadia_live/autoresearch/stop_autoresearch.flag
|
| 92 |
+
versions/v4_arcadia_live/autoresearch/candidate_train.py.bak
|
| 93 |
+
# Lab notebook, rejected log, and state.json ARE committed — they document
|
| 94 |
+
# real autoresearch execution history (provenance for judges).
|
| 95 |
+
|
| 96 |
+
# OpenRouter usage audit log (per-call timestamps, no keys)
|
| 97 |
+
.openrouter_usage.jsonl
|
| 98 |
+
# Frontier panel run intermediate caches
|
| 99 |
+
.openrouter_cache/
|
| 100 |
+
lora_stdout.log
|
| 101 |
+
|
| 102 |
+
# Pass 8 — large harvest data (regenerable via train.py harvest_trajectories)
|
| 103 |
+
versions/v5_phoenix/experiments/rap_xc_v1/transitions.npz
|
| 104 |
+
versions/v5_phoenix/experiments/rap_xc_v1/transitions_synth.npz
|
| 105 |
+
versions/v5_phoenix/experiments/rap_xc_v1/smoke*.npz
|
| 106 |
+
versions/v5_phoenix/experiments/rap_xc_v1/rapxc_synth.pt
|
| 107 |
+
versions/v5_phoenix/experiments/rap_xc_v1/*.log
|
| 108 |
+
tests/receipts/*.log
|
Dockerfile
CHANGED
|
@@ -1,32 +1,32 @@
|
|
| 1 |
-
# ── Stage 1: Install dependencies ──────────────────────────────────
|
| 2 |
-
FROM python:3.11-slim AS builder
|
| 3 |
-
|
| 4 |
-
WORKDIR /build
|
| 5 |
-
COPY requirements.txt .
|
| 6 |
-
RUN pip install --no-cache-dir --prefix=/install -r requirements.txt
|
| 7 |
-
|
| 8 |
-
# ── Stage 2: Production image ─────────────────────────────────────
|
| 9 |
-
FROM python:3.11-slim
|
| 10 |
-
|
| 11 |
-
# Non-root user for security (UID 1000 is conventional)
|
| 12 |
-
RUN useradd --create-home --uid 1000 appuser
|
| 13 |
-
|
| 14 |
-
WORKDIR /app
|
| 15 |
-
|
| 16 |
-
# Copy installed packages from builder
|
| 17 |
-
COPY --from=builder /install /usr/local
|
| 18 |
-
|
| 19 |
-
# Copy application code
|
| 20 |
-
COPY . .
|
| 21 |
-
|
| 22 |
-
# Own the app directory
|
| 23 |
-
RUN chown -R appuser:appuser /app
|
| 24 |
-
|
| 25 |
-
USER appuser
|
| 26 |
-
|
| 27 |
-
EXPOSE 8000
|
| 28 |
-
|
| 29 |
-
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
| 30 |
-
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
|
| 31 |
-
|
| 32 |
-
CMD ["uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "8000"]
|
|
|
|
| 1 |
+
# ── Stage 1: Install dependencies ──────────────────────────────────
|
| 2 |
+
FROM python:3.11-slim AS builder
|
| 3 |
+
|
| 4 |
+
WORKDIR /build
|
| 5 |
+
COPY requirements.txt .
|
| 6 |
+
RUN pip install --no-cache-dir --prefix=/install -r requirements.txt
|
| 7 |
+
|
| 8 |
+
# ── Stage 2: Production image ─────────────────────────────────────
|
| 9 |
+
FROM python:3.11-slim
|
| 10 |
+
|
| 11 |
+
# Non-root user for security (UID 1000 is conventional)
|
| 12 |
+
RUN useradd --create-home --uid 1000 appuser
|
| 13 |
+
|
| 14 |
+
WORKDIR /app
|
| 15 |
+
|
| 16 |
+
# Copy installed packages from builder
|
| 17 |
+
COPY --from=builder /install /usr/local
|
| 18 |
+
|
| 19 |
+
# Copy application code
|
| 20 |
+
COPY . .
|
| 21 |
+
|
| 22 |
+
# Own the app directory
|
| 23 |
+
RUN chown -R appuser:appuser /app
|
| 24 |
+
|
| 25 |
+
USER appuser
|
| 26 |
+
|
| 27 |
+
EXPOSE 8000
|
| 28 |
+
|
| 29 |
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
| 30 |
+
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
|
| 31 |
+
|
| 32 |
+
CMD ["uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "8000"]
|
Dockerfile.damocles
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
# SupplyMind v3.0-arcadia — Damocles API (FastAPI)
|
| 2 |
-
# Deploys v3_arcadia/90_damocles/app.py with /assess, /forecast, /rag, /rl/act, /health
|
| 3 |
#
|
| 4 |
# Build: docker build -f Dockerfile.damocles -t supplymind-damocles:v3.0-arcadia .
|
| 5 |
# Run: docker run -p 8765:8765 supplymind-damocles:v3.0-arcadia
|
|
@@ -18,10 +18,10 @@ COPY requirements-damocles.txt .
|
|
| 18 |
RUN pip install --no-cache-dir -r requirements-damocles.txt
|
| 19 |
|
| 20 |
# App code
|
| 21 |
-
COPY v3_arcadia/90_damocles/ /app/v3_arcadia/90_damocles/
|
| 22 |
-
COPY v3_arcadia/checkpoints/granite/corpus_chunks.pkl /app/v3_arcadia/checkpoints/granite/
|
| 23 |
# Embeddings loaded lazily from cached .npy (mounted at runtime or fetched via env)
|
| 24 |
-
COPY v3_arcadia/checkpoints/gethsemane/ppo_easy_typhoon_response.onnx /app/v3_arcadia/checkpoints/gethsemane/
|
| 25 |
COPY models/mxbai-embed-large/ /app/models/mxbai-embed-large/
|
| 26 |
|
| 27 |
# Healthcheck
|
|
|
|
| 1 |
# SupplyMind v3.0-arcadia — Damocles API (FastAPI)
|
| 2 |
+
# Deploys versions/v3_arcadia/90_damocles/app.py with /assess, /forecast, /rag, /rl/act, /health
|
| 3 |
#
|
| 4 |
# Build: docker build -f Dockerfile.damocles -t supplymind-damocles:v3.0-arcadia .
|
| 5 |
# Run: docker run -p 8765:8765 supplymind-damocles:v3.0-arcadia
|
|
|
|
| 18 |
RUN pip install --no-cache-dir -r requirements-damocles.txt
|
| 19 |
|
| 20 |
# App code
|
| 21 |
+
COPY versions/v3_arcadia/90_damocles/ /app/versions/v3_arcadia/90_damocles/
|
| 22 |
+
COPY versions/v3_arcadia/checkpoints/granite/corpus_chunks.pkl /app/versions/v3_arcadia/checkpoints/granite/
|
| 23 |
# Embeddings loaded lazily from cached .npy (mounted at runtime or fetched via env)
|
| 24 |
+
COPY versions/v3_arcadia/checkpoints/gethsemane/ppo_easy_typhoon_response.onnx /app/versions/v3_arcadia/checkpoints/gethsemane/
|
| 25 |
COPY models/mxbai-embed-large/ /app/models/mxbai-embed-large/
|
| 26 |
|
| 27 |
# Healthcheck
|
FINAL_SUBMIT/ALL_250_FEATURES_LIVE_PROOF.md
CHANGED
|
@@ -25,9 +25,9 @@ Status legend:
|
|
| 25 |
| A7 | 30-step horizon | `server/supply_environment.py` | reset config | ✅ |
|
| 26 |
| A8 | $5–15M budget tasks | `data/disruptions.json` | task manifest | ✅ |
|
| 27 |
| A9 | TSMC/Samsung coords | `data/companies_real.json` | n_real_nodes=40 | ✅ |
|
| 28 |
-
| A10 | 8-event crisis library v1 | `
|
| 29 |
-
| A11 | Wordle RLVR mini-env | `
|
| 30 |
-
| A12 | RLVE adaptive curriculum | `
|
| 31 |
|
| 32 |
## B · Reward engineering (14) — 14/14 ✅
|
| 33 |
|
|
|
|
| 25 |
| A7 | 30-step horizon | `server/supply_environment.py` | reset config | ✅ |
|
| 26 |
| A8 | $5–15M budget tasks | `data/disruptions.json` | task manifest | ✅ |
|
| 27 |
| A9 | TSMC/Samsung coords | `data/companies_real.json` | n_real_nodes=40 | ✅ |
|
| 28 |
+
| A10 | 8-event crisis library v1 | `versions/v4_arcadia_live/realtime/crisis_library.py` | 8 events indexed | ✅ |
|
| 29 |
+
| A11 | Wordle RLVR mini-env | `versions/v5_phoenix/wordle_env/env.py` | `wordle_real_reinforce_v2_curve.json` | ✅ |
|
| 30 |
+
| A12 | RLVE adaptive curriculum | `versions/v5_phoenix/wordle_env/rlve_curriculum.py` | `rlve_curriculum_smoke.json` | ✅ |
|
| 31 |
|
| 32 |
## B · Reward engineering (14) — 14/14 ✅
|
| 33 |
|
FINAL_SUBMIT/ARCHITECTURE.md
CHANGED
|
@@ -90,7 +90,7 @@
|
|
| 90 |
|
| 91 |
Plus 4 base wrappers: `qwen25-14b-local`, `qwen25-coder-local`, `mistral-nemo-local`, `deepseek-r1-local-q4`.
|
| 92 |
|
| 93 |
-
5 Modelfiles committed at `rl/lora/Modelfile`, `Modelfile.v2-v4`, `
|
| 94 |
|
| 95 |
### 4. LoRA fine-tuning track
|
| 96 |
|
|
@@ -100,7 +100,7 @@ Qwen-2.5-1.5B → PEFT/LoRA → 4-bit NF4 (bitsandbytes) → TRL → 225 instruc
|
|
| 100 |
|
| 101 |
Qwen-2.5-3B-Instruct base. 21 preference pairs from R4 ground truth at `dpo_judge/data/preference_pairs.jsonl`. DPO sigmoid loss, β=0.1, LoRA r=8 / α=16, hf strategy (single-GPU 12GB), per_device_train_batch_size=1, gradient_accumulation_steps=4, lr=5e-5, save_adapter_only.
|
| 102 |
|
| 103 |
-
5 trainers in `
|
| 104 |
- `train_dpo_trl.py` — TRL standalone (ROLL-free fallback)
|
| 105 |
- `train_dpo_roll.py` — ROLL-integrated
|
| 106 |
- `train_grpo_env.py` — GRPO multi-turn
|
|
@@ -178,7 +178,7 @@ dag_feats (80-d) ──→ DAGEncoder
|
|
| 178 |
|
| 179 |
### 14. Live data layer (20 sources)
|
| 180 |
|
| 181 |
-
`
|
| 182 |
|
| 183 |
NewsAPI · GDELT · GDELT-Conflict · GDELT-Humanitarian · USGS earthquakes · NOAA NDBC buoys · NOAA Tides · NASA EONET · NASA FIRMS fires · EIA Brent · EIA WTI · EIA natgas · MarineTraffic AIS · Global Fishing Watch · World Bank commodities · WHO DON · SEC EDGAR · CISA KEV · OFAC sanctions · Wikipedia pageviews · HN tech ticker
|
| 184 |
|
|
|
|
| 90 |
|
| 91 |
Plus 4 base wrappers: `qwen25-14b-local`, `qwen25-coder-local`, `mistral-nemo-local`, `deepseek-r1-local-q4`.
|
| 92 |
|
| 93 |
+
5 Modelfiles committed at `rl/lora/Modelfile`, `Modelfile.v2-v4`, `versions/v4_arcadia_live/features/Modelfile.analyst_v5`.
|
| 94 |
|
| 95 |
### 4. LoRA fine-tuning track
|
| 96 |
|
|
|
|
| 100 |
|
| 101 |
Qwen-2.5-3B-Instruct base. 21 preference pairs from R4 ground truth at `dpo_judge/data/preference_pairs.jsonl`. DPO sigmoid loss, β=0.1, LoRA r=8 / α=16, hf strategy (single-GPU 12GB), per_device_train_batch_size=1, gradient_accumulation_steps=4, lr=5e-5, save_adapter_only.
|
| 102 |
|
| 103 |
+
5 trainers in `versions/v5_phoenix/roll_integration/dpo_judge/`:
|
| 104 |
- `train_dpo_trl.py` — TRL standalone (ROLL-free fallback)
|
| 105 |
- `train_dpo_roll.py` — ROLL-integrated
|
| 106 |
- `train_grpo_env.py` — GRPO multi-turn
|
|
|
|
| 178 |
|
| 179 |
### 14. Live data layer (20 sources)
|
| 180 |
|
| 181 |
+
`versions/v4_arcadia_live/realtime/orchestrator_v2.py` fans out to 20 sources via ThreadPoolExecutor with per-source timeouts and graceful failure:
|
| 182 |
|
| 183 |
NewsAPI · GDELT · GDELT-Conflict · GDELT-Humanitarian · USGS earthquakes · NOAA NDBC buoys · NOAA Tides · NASA EONET · NASA FIRMS fires · EIA Brent · EIA WTI · EIA natgas · MarineTraffic AIS · Global Fishing Watch · World Bank commodities · WHO DON · SEC EDGAR · CISA KEV · OFAC sanctions · Wikipedia pageviews · HN tech ticker
|
| 184 |
|
FINAL_SUBMIT/BENCHMARK_REPORT.md
CHANGED
|
@@ -69,7 +69,7 @@ Tested on 32k held-out training rows of real harvested transitions. The split-co
|
|
| 69 |
|
| 70 |
## 4. RAP-XC training on real harvest
|
| 71 |
|
| 72 |
-
`
|
| 73 |
|
| 74 |
| Metric | Result |
|
| 75 |
|---|---|
|
|
@@ -83,7 +83,7 @@ Tested on 32k held-out training rows of real harvested transitions. The split-co
|
|
| 83 |
|
| 84 |
## 5. HetTemporalGAT vs v1 GCN cascade
|
| 85 |
|
| 86 |
-
`
|
| 87 |
|
| 88 |
Task: arrival-time regression on R6 cascade graphs (real semiconductor supply-chain).
|
| 89 |
|
|
@@ -111,7 +111,7 @@ Strong cross-corpus stability — same panel produces near-identical α on indep
|
|
| 111 |
|
| 112 |
## 7. Tohoku 2011 Platinum counterfactual replication
|
| 113 |
|
| 114 |
-
`
|
| 115 |
|
| 116 |
| Metric | Value |
|
| 117 |
|---|---|
|
|
|
|
| 69 |
|
| 70 |
## 4. RAP-XC training on real harvest
|
| 71 |
|
| 72 |
+
`versions/v5_phoenix/rap_xc/train.py` → `versions/v5_phoenix/experiments/rap_xc_v1/rapxc.pt`
|
| 73 |
|
| 74 |
| Metric | Result |
|
| 75 |
|---|---|
|
|
|
|
| 83 |
|
| 84 |
## 5. HetTemporalGAT vs v1 GCN cascade
|
| 85 |
|
| 86 |
+
`versions/v5_phoenix/gnn_v2/train_hetgat.py` → `versions/v5_phoenix/experiments/hetgat_v1/report.json`
|
| 87 |
|
| 88 |
Task: arrival-time regression on R6 cascade graphs (real semiconductor supply-chain).
|
| 89 |
|
|
|
|
| 111 |
|
| 112 |
## 7. Tohoku 2011 Platinum counterfactual replication
|
| 113 |
|
| 114 |
+
`versions/v5_phoenix/counterfactual_v2/platinum.py` synthetic-control method on real Tohoku 2011 economic data.
|
| 115 |
|
| 116 |
| Metric | Value |
|
| 117 |
|---|---|
|
FINAL_SUBMIT/COLD_OPEN_OPENING_LINES.md
CHANGED
|
@@ -1,26 +1,26 @@
|
|
| 1 |
-
# COLD OPEN -- opening lines for judge pitch (<= 8 sec each)
|
| 2 |
-
|
| 3 |
-
## Three variants depending on judge persona
|
| 4 |
-
|
| 5 |
-
### A -- Technical depth judge (academic/research)
|
| 6 |
-
> "REINFORCE on Wordle: 100% solve rate, Wilcoxon p=1.87e-34, Cohen's d=3.89, 9.8 seconds on a single CPU thread. Same loop drives a 280-action supply-chain RL env with 1500-event EMDAT RAG corpus and conformal action filter at 0.9001 empirical coverage."
|
| 7 |
-
|
| 8 |
-
### B -- Industry pragmatist (engineer/PM)
|
| 9 |
-
> "If Hormuz closes tomorrow, India loses INR X-trillion in 30 days. Watch what one LLM, RL-trained, does about it -- live API calls, real EIA price data, real NASA fire feed, end-to-end in 7 seconds with a sha256 receipt for every claim."
|
| 10 |
-
|
| 11 |
-
### C -- Storyteller (DevRel/PM)
|
| 12 |
-
> "Most hackathon entries train on Wordle. We ALSO train on Wordle -- and use the same canonical loop on a real-world supply-chain crisis simulator with 9 live data feeds. One submission, all three hackathon themes, every claim sha256-replayable."
|
| 13 |
-
|
| 14 |
-
## Use-case map
|
| 15 |
-
|
| 16 |
-
| Persona | Likely panel weight | Use line |
|
| 17 |
-
|---|---|---|
|
| 18 |
-
| Academic/research | 40% (per VICTORY_CALCULUS) | A |
|
| 19 |
-
| Industry/PM | 35% | B |
|
| 20 |
-
| Storyteller/DevRel | 25% | C |
|
| 21 |
-
|
| 22 |
-
## Backup ultra-short variants (<= 4 sec)
|
| 23 |
-
|
| 24 |
-
- "100% solve, p=1e-34, 9.8 seconds, CPU only."
|
| 25 |
-
- "9 live APIs. 1500 events. 7-second war room."
|
| 26 |
-
- "Three themes. One env. Every claim hashed."
|
|
|
|
| 1 |
+
# COLD OPEN -- opening lines for judge pitch (<= 8 sec each)
|
| 2 |
+
|
| 3 |
+
## Three variants depending on judge persona
|
| 4 |
+
|
| 5 |
+
### A -- Technical depth judge (academic/research)
|
| 6 |
+
> "REINFORCE on Wordle: 100% solve rate, Wilcoxon p=1.87e-34, Cohen's d=3.89, 9.8 seconds on a single CPU thread. Same loop drives a 280-action supply-chain RL env with 1500-event EMDAT RAG corpus and conformal action filter at 0.9001 empirical coverage."
|
| 7 |
+
|
| 8 |
+
### B -- Industry pragmatist (engineer/PM)
|
| 9 |
+
> "If Hormuz closes tomorrow, India loses INR X-trillion in 30 days. Watch what one LLM, RL-trained, does about it -- live API calls, real EIA price data, real NASA fire feed, end-to-end in 7 seconds with a sha256 receipt for every claim."
|
| 10 |
+
|
| 11 |
+
### C -- Storyteller (DevRel/PM)
|
| 12 |
+
> "Most hackathon entries train on Wordle. We ALSO train on Wordle -- and use the same canonical loop on a real-world supply-chain crisis simulator with 9 live data feeds. One submission, all three hackathon themes, every claim sha256-replayable."
|
| 13 |
+
|
| 14 |
+
## Use-case map
|
| 15 |
+
|
| 16 |
+
| Persona | Likely panel weight | Use line |
|
| 17 |
+
|---|---|---|
|
| 18 |
+
| Academic/research | 40% (per VICTORY_CALCULUS) | A |
|
| 19 |
+
| Industry/PM | 35% | B |
|
| 20 |
+
| Storyteller/DevRel | 25% | C |
|
| 21 |
+
|
| 22 |
+
## Backup ultra-short variants (<= 4 sec)
|
| 23 |
+
|
| 24 |
+
- "100% solve, p=1e-34, 9.8 seconds, CPU only."
|
| 25 |
+
- "9 live APIs. 1500 events. 7-second war room."
|
| 26 |
+
- "Three themes. One env. Every claim hashed."
|
FINAL_SUBMIT/DATASET_CARD.md
CHANGED
|
@@ -23,11 +23,11 @@
|
|
| 23 |
## Static datasets
|
| 24 |
| Name | Size | Description | Path |
|
| 25 |
|------|------|-------------|------|
|
| 26 |
-
| EMDAT crisis library v2 | ~1500 events | historical disaster impact records | `
|
| 27 |
-
| Hand-curated 8 events | 8 events | Iran/Israel/Hormuz/Red-Sea/Suez/Taiwan/Thailand/Tohoku | `
|
| 28 |
| WTI crude time-series | 2,818 windows | DCOILWTICO from FRED | TFT training |
|
| 29 |
| Real company nodes | 40 nodes | TSMC/Samsung/Toyota etc with real coords | `data/companies_real.json` |
|
| 30 |
-
| Wordle dictionary | 102 words | 5-letter common words (tier-0 baseline) | `
|
| 31 |
| Wordle tier 1+ | +200/+150/+80 words | RLVE expansion tiers | `rlve_curriculum.py` |
|
| 32 |
| RAG corpus | 6,483 chunks | wiki_crisis 564 + sec_10k 5790 + policy 129 | `R5_GRANITE.json` |
|
| 33 |
| Conformal calibration NLLs | 5,696 (v2) / 16,000 (v3) | nonconformity scores | `conformal_*.json` |
|
|
|
|
| 23 |
## Static datasets
|
| 24 |
| Name | Size | Description | Path |
|
| 25 |
|------|------|-------------|------|
|
| 26 |
+
| EMDAT crisis library v2 | ~1500 events | historical disaster impact records | `versions/v4_arcadia_live/scenarios/` |
|
| 27 |
+
| Hand-curated 8 events | 8 events | Iran/Israel/Hormuz/Red-Sea/Suez/Taiwan/Thailand/Tohoku | `versions/v4_arcadia_live/realtime/crisis_library.py` |
|
| 28 |
| WTI crude time-series | 2,818 windows | DCOILWTICO from FRED | TFT training |
|
| 29 |
| Real company nodes | 40 nodes | TSMC/Samsung/Toyota etc with real coords | `data/companies_real.json` |
|
| 30 |
+
| Wordle dictionary | 102 words | 5-letter common words (tier-0 baseline) | `versions/v5_phoenix/wordle_env/env.py` |
|
| 31 |
| Wordle tier 1+ | +200/+150/+80 words | RLVE expansion tiers | `rlve_curriculum.py` |
|
| 32 |
| RAG corpus | 6,483 chunks | wiki_crisis 564 + sec_10k 5790 + policy 129 | `R5_GRANITE.json` |
|
| 33 |
| Conformal calibration NLLs | 5,696 (v2) / 16,000 (v3) | nonconformity scores | `conformal_*.json` |
|
FINAL_SUBMIT/ENV_CARD.md
CHANGED
|
@@ -45,7 +45,7 @@
|
|
| 45 |
- **hard_cascading_crisis** — 40 nodes, 60 days, $15M budget, cascading
|
| 46 |
|
| 47 |
## Wordle Companion Environment
|
| 48 |
-
- **Class**: `
|
| 49 |
- **Type**: Canonical RLVR mini-env
|
| 50 |
- **Action space**: `Discrete(102)` (102-word baseline) or restricted by curriculum tier
|
| 51 |
- **State**: 188-dim (rich encoding per `final_real_reinforce_wordle_v2.py`)
|
|
|
|
| 45 |
- **hard_cascading_crisis** — 40 nodes, 60 days, $15M budget, cascading
|
| 46 |
|
| 47 |
## Wordle Companion Environment
|
| 48 |
+
- **Class**: `versions.v5_phoenix.wordle_env.env`
|
| 49 |
- **Type**: Canonical RLVR mini-env
|
| 50 |
- **Action space**: `Discrete(102)` (102-word baseline) or restricted by curriculum tier
|
| 51 |
- **State**: 188-dim (rich encoding per `final_real_reinforce_wordle_v2.py`)
|
FINAL_SUBMIT/FEATURE_INVENTORY.md
CHANGED
|
@@ -8,9 +8,9 @@ Verification: every bullet point in the project plan mapped to file:line.
|
|
| 8 |
|
| 9 |
| Component | Previous | Now wired in |
|
| 10 |
|---|---|---|
|
| 11 |
-
| Chronos-Bolt-base | PARTIAL (verify only) | `
|
| 12 |
-
| TimesFM-2 | PARTIAL (verify only) | `
|
| 13 |
-
| TabPFN-v2 regressor | PARTIAL (verify only) | `
|
| 14 |
|
| 15 |
Closed Brent backtest gap from 6/8 to **8/8 within ±30%** (median rel err 3.3%).
|
| 16 |
See `tests/receipts/ensemble_brent_validation.json`.
|
|
@@ -22,12 +22,12 @@ See `tests/receipts/ensemble_brent_validation.json`.
|
|
| 22 |
| Bullet | Status | Path(s) | Note |
|
| 23 |
|---|---|---|---|
|
| 24 |
| supplymind-analyst:v1 | MISSING | — | only v2-v5 retained; v1 superseded |
|
| 25 |
-
| supplymind-analyst:v2-v5 | PRESENT | `rl/lora/Modelfile.v2:1-20`, `Modelfile.v3:1-20`, `Modelfile.v4:1-20`, `
|
| 26 |
-
| qwen25-14b-local Modelfile | PRESENT | `v3_arcadia/00_emergence/qwen25-14b.Modelfile:1-19` | Q4_K_M |
|
| 27 |
-
| qwen25-coder-local Modelfile | PRESENT | `v3_arcadia/00_emergence/qwen25-coder-14b.Modelfile:1-19` | JSON-mode |
|
| 28 |
-
| mistral-nemo-local Modelfile | PRESENT | `v3_arcadia/00_emergence/mistral-nemo.Modelfile:1-18` | num_ctx 32768 |
|
| 29 |
| deepseek-r1-local-q4 Modelfile | PRESENT | `docs/OLLAMA_FINE_TUNING_FINAL_UPGRADE.md` | Q4_K_M reference |
|
| 30 |
-
| 5 Modelfile files (rl/lora/*) | PRESENT | `rl/lora/Modelfile, .v2, .v3, .v4` + `
|
| 31 |
|
| 32 |
## A.2 Modelfile Crafting
|
| 33 |
|
|
@@ -54,7 +54,7 @@ See `tests/receipts/ensemble_brent_validation.json`.
|
|
| 54 |
|
| 55 |
| Bullet | Status | Path(s) | Note |
|
| 56 |
|---|---|---|---|
|
| 57 |
-
| `dpo_judge/*` directory | PRESENT | `
|
| 58 |
| `prepare_preference_data.py` | PRESENT | `dpo_judge/prepare_preference_data.py:1-50+` | DPO pair builder |
|
| 59 |
| `train_dpo_trl.py` | PRESENT | `dpo_judge/train_dpo_trl.py:1-50+` | TRL trainer |
|
| 60 |
| `train_dpo_roll.py` | PRESENT | `dpo_judge/train_dpo_roll.py:1-30+` | ROLL-integrated |
|
|
@@ -81,7 +81,7 @@ See `tests/receipts/ensemble_brent_validation.json`.
|
|
| 81 |
|---|---|---|---|
|
| 82 |
| Q4_K_M references | PRESENT | `mistral-nemo.Modelfile:1`, `qwen25-14b.Modelfile:1`, `qwen25-coder-14b.Modelfile:1` | all 3 specify q4km |
|
| 83 |
| `OLLAMA_MAX_LOADED_MODELS=1` | PRESENT | `docs/OLLAMA_FINE_TUNING_FINAL_UPGRADE.md` | VRAM discipline |
|
| 84 |
-
| `convert_bge_to_safetensors.py` | PRESENT | `v3_arcadia/00_emergence/convert_bge_to_safetensors.py:1-45` | CVE-2025-32434 workaround |
|
| 85 |
| 2GB safetensors output | PRESENT | `models/bge-m3/model.safetensors` | 2.2GB verified |
|
| 86 |
|
| 87 |
## B. 13 Foundation Models
|
|
@@ -106,14 +106,14 @@ See `tests/receipts/ensemble_brent_validation.json`.
|
|
| 106 |
|
| 107 |
| Script | Status | Path |
|
| 108 |
|---|---|---|
|
| 109 |
-
| `verify_qwen14b.py` | PRESENT | `v3_arcadia/00_emergence/verify_qwen14b.py` |
|
| 110 |
-
| `verify_mistral_nemo.py` | PRESENT | `v3_arcadia/00_emergence/verify_mistral_nemo.py` |
|
| 111 |
-
| `verify_qwen_coder.py` | PRESENT | `v3_arcadia/00_emergence/verify_qwen_coder.py` |
|
| 112 |
-
| `verify_qwen_vl.py` | PRESENT | `v3_arcadia/00_emergence/verify_qwen_vl.py` |
|
| 113 |
-
| `verify_tabpfn.py` | PRESENT | `v3_arcadia/00_emergence/verify_tabpfn.py` |
|
| 114 |
-
| `verify_timesfm.py` | PRESENT | `v3_arcadia/00_emergence/verify_timesfm.py` |
|
| 115 |
-
| `verify_embedders_chronos.py` | PRESENT | `v3_arcadia/00_emergence/verify_embedders_chronos.py` |
|
| 116 |
-
| `r1_qwen_vl_downstream.py` | PRESENT | `v3_arcadia/00_emergence/r1_qwen_vl_downstream.py` |
|
| 117 |
|
| 118 |
## C.1 Game-Engine Tasks & Action Space
|
| 119 |
|
|
@@ -203,19 +203,19 @@ See `tests/receipts/ensemble_brent_validation.json`.
|
|
| 203 |
|
| 204 |
| Component | Path | Purpose |
|
| 205 |
|---|---|---|
|
| 206 |
-
| Hormuz War Room orchestrator | `
|
| 207 |
-
| India 7-sector exposure | `
|
| 208 |
-
| Gulf 7-sector exposure | `
|
| 209 |
-
| Hormuz chokepoint graph | `
|
| 210 |
-
| OpenRouter 6-judge cross-check | `
|
| 211 |
| War-Room dashboard HTML | `server/static/hormuz_war_room.html` | dark-mode 6-panel UI |
|
| 212 |
| War-Room validation harness | `scripts/validate_war_room.py` | 8-event historical backtest |
|
| 213 |
-
| Ensemble Brent forecaster | `
|
| 214 |
| Ensemble Brent validator | `scripts/validate_ensemble_brent.py` | 8-event closed-form backtest |
|
| 215 |
| Master demo HTML | `server/static/master.html` | 9-card live integration page |
|
| 216 |
-
| RAP-XC weights | `
|
| 217 |
-
| Conformal weights | `
|
| 218 |
-
| HetGAT report | `
|
| 219 |
|
| 220 |
## API Keys (every key reaches a UI element)
|
| 221 |
|
|
|
|
| 8 |
|
| 9 |
| Component | Previous | Now wired in |
|
| 10 |
|---|---|---|
|
| 11 |
+
| Chronos-Bolt-base | PARTIAL (verify only) | `versions/v5_phoenix/forecast_v2/ensemble_brent.py:53-71` |
|
| 12 |
+
| TimesFM-2 | PARTIAL (verify only) | `versions/v5_phoenix/forecast_v2/ensemble_brent.py:74-99` |
|
| 13 |
+
| TabPFN-v2 regressor | PARTIAL (verify only) | `versions/v5_phoenix/forecast_v2/ensemble_brent.py:101-145` |
|
| 14 |
|
| 15 |
Closed Brent backtest gap from 6/8 to **8/8 within ±30%** (median rel err 3.3%).
|
| 16 |
See `tests/receipts/ensemble_brent_validation.json`.
|
|
|
|
| 22 |
| Bullet | Status | Path(s) | Note |
|
| 23 |
|---|---|---|---|
|
| 24 |
| supplymind-analyst:v1 | MISSING | — | only v2-v5 retained; v1 superseded |
|
| 25 |
+
| supplymind-analyst:v2-v5 | PRESENT | `rl/lora/Modelfile.v2:1-20`, `Modelfile.v3:1-20`, `Modelfile.v4:1-20`, `versions/v4_arcadia_live/features/Modelfile.analyst_v5:1-20` | 4 versions |
|
| 26 |
+
| qwen25-14b-local Modelfile | PRESENT | `versions/v3_arcadia/00_emergence/qwen25-14b.Modelfile:1-19` | Q4_K_M |
|
| 27 |
+
| qwen25-coder-local Modelfile | PRESENT | `versions/v3_arcadia/00_emergence/qwen25-coder-14b.Modelfile:1-19` | JSON-mode |
|
| 28 |
+
| mistral-nemo-local Modelfile | PRESENT | `versions/v3_arcadia/00_emergence/mistral-nemo.Modelfile:1-18` | num_ctx 32768 |
|
| 29 |
| deepseek-r1-local-q4 Modelfile | PRESENT | `docs/OLLAMA_FINE_TUNING_FINAL_UPGRADE.md` | Q4_K_M reference |
|
| 30 |
+
| 5 Modelfile files (rl/lora/*) | PRESENT | `rl/lora/Modelfile, .v2, .v3, .v4` + `versions/v4_arcadia_live/features/Modelfile.analyst_v5` | All 5 present |
|
| 31 |
|
| 32 |
## A.2 Modelfile Crafting
|
| 33 |
|
|
|
|
| 54 |
|
| 55 |
| Bullet | Status | Path(s) | Note |
|
| 56 |
|---|---|---|---|
|
| 57 |
+
| `dpo_judge/*` directory | PRESENT | `versions/v5_phoenix/roll_integration/dpo_judge/` | 6 files |
|
| 58 |
| `prepare_preference_data.py` | PRESENT | `dpo_judge/prepare_preference_data.py:1-50+` | DPO pair builder |
|
| 59 |
| `train_dpo_trl.py` | PRESENT | `dpo_judge/train_dpo_trl.py:1-50+` | TRL trainer |
|
| 60 |
| `train_dpo_roll.py` | PRESENT | `dpo_judge/train_dpo_roll.py:1-30+` | ROLL-integrated |
|
|
|
|
| 81 |
|---|---|---|---|
|
| 82 |
| Q4_K_M references | PRESENT | `mistral-nemo.Modelfile:1`, `qwen25-14b.Modelfile:1`, `qwen25-coder-14b.Modelfile:1` | all 3 specify q4km |
|
| 83 |
| `OLLAMA_MAX_LOADED_MODELS=1` | PRESENT | `docs/OLLAMA_FINE_TUNING_FINAL_UPGRADE.md` | VRAM discipline |
|
| 84 |
+
| `convert_bge_to_safetensors.py` | PRESENT | `versions/v3_arcadia/00_emergence/convert_bge_to_safetensors.py:1-45` | CVE-2025-32434 workaround |
|
| 85 |
| 2GB safetensors output | PRESENT | `models/bge-m3/model.safetensors` | 2.2GB verified |
|
| 86 |
|
| 87 |
## B. 13 Foundation Models
|
|
|
|
| 106 |
|
| 107 |
| Script | Status | Path |
|
| 108 |
|---|---|---|
|
| 109 |
+
| `verify_qwen14b.py` | PRESENT | `versions/v3_arcadia/00_emergence/verify_qwen14b.py` |
|
| 110 |
+
| `verify_mistral_nemo.py` | PRESENT | `versions/v3_arcadia/00_emergence/verify_mistral_nemo.py` |
|
| 111 |
+
| `verify_qwen_coder.py` | PRESENT | `versions/v3_arcadia/00_emergence/verify_qwen_coder.py` |
|
| 112 |
+
| `verify_qwen_vl.py` | PRESENT | `versions/v3_arcadia/00_emergence/verify_qwen_vl.py` |
|
| 113 |
+
| `verify_tabpfn.py` | PRESENT | `versions/v3_arcadia/00_emergence/verify_tabpfn.py` |
|
| 114 |
+
| `verify_timesfm.py` | PRESENT | `versions/v3_arcadia/00_emergence/verify_timesfm.py` |
|
| 115 |
+
| `verify_embedders_chronos.py` | PRESENT | `versions/v3_arcadia/00_emergence/verify_embedders_chronos.py` |
|
| 116 |
+
| `r1_qwen_vl_downstream.py` | PRESENT | `versions/v3_arcadia/00_emergence/r1_qwen_vl_downstream.py` |
|
| 117 |
|
| 118 |
## C.1 Game-Engine Tasks & Action Space
|
| 119 |
|
|
|
|
| 203 |
|
| 204 |
| Component | Path | Purpose |
|
| 205 |
|---|---|---|
|
| 206 |
+
| Hormuz War Room orchestrator | `versions/v4_arcadia_live/realtime/hormuz_war_room_router.py` | `/demo/hormuz-war-room` POST + UI route |
|
| 207 |
+
| India 7-sector exposure | `versions/v4_arcadia_live/scenarios/india_industry_exposure.py` | 7 cited sectors + deterministic scorer |
|
| 208 |
+
| Gulf 7-sector exposure | `versions/v4_arcadia_live/scenarios/gulf_industry_exposure.py` | 7 cited sectors + bypass-credit scorer |
|
| 209 |
+
| Hormuz chokepoint graph | `versions/v4_arcadia_live/scenarios/hormuz_chokepoint_graph.py` | 14 nodes + 18 edges + 5 IEA facts |
|
| 210 |
+
| OpenRouter 6-judge cross-check | `versions/v4_arcadia_live/realtime/openrouter_war_room_panel.py` | gpt-oss-120b, gemma, glm, minimax, nemotron, gemma-26b |
|
| 211 |
| War-Room dashboard HTML | `server/static/hormuz_war_room.html` | dark-mode 6-panel UI |
|
| 212 |
| War-Room validation harness | `scripts/validate_war_room.py` | 8-event historical backtest |
|
| 213 |
+
| Ensemble Brent forecaster | `versions/v5_phoenix/forecast_v2/ensemble_brent.py` | Chronos+TimesFM+TabPFN, 8/8 ±30% |
|
| 214 |
| Ensemble Brent validator | `scripts/validate_ensemble_brent.py` | 8-event closed-form backtest |
|
| 215 |
| Master demo HTML | `server/static/master.html` | 9-card live integration page |
|
| 216 |
+
| RAP-XC weights | `versions/v5_phoenix/experiments/rap_xc_v1/rapxc.pt` | 3.14M params, BC 5.62→0.23 |
|
| 217 |
+
| Conformal weights | `versions/v5_phoenix/action_v2/conformal_calibrated.pt` | α=0.1, coverage 0.9001 |
|
| 218 |
+
| HetGAT report | `versions/v5_phoenix/experiments/hetgat_v1/report.json` | +7.77/+12.15/+10.03% |
|
| 219 |
|
| 220 |
## API Keys (every key reaches a UI element)
|
| 221 |
|
FINAL_SUBMIT/FEATURE_INVENTORY_DI.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
|
| 3 |
Bullet-by-bullet status across the 4 sections D, E, F, G, H, I (~140 bullets). Each row links to a file or a JSON receipt that proves the claim.
|
| 4 |
|
| 5 |
-
**Note:** receipts named `R*_*.json` are mirrored from `v3_arcadia/results/` to `FINAL_SUBMIT/receipts/`.
|
| 6 |
|
| 7 |
---
|
| 8 |
|
|
@@ -86,7 +86,7 @@ Bullet-by-bullet status across the 4 sections D, E, F, G, H, I (~140 bullets). E
|
|
| 86 |
|---|---|---|---|
|
| 87 |
| 44 | Custom TFT 513K params on 3-target FRED | ✅ | `tft_real_metrics.json: params, test_mae_p50: {DCOILWTICO, PCOPPUSDM, PPICMM}` |
|
| 88 |
| 45 | Custom TFT 90K params on real WTI MAE $7.83/bbl | ✅ | `tft_v2_metrics.json: params, test_mae_p50.DCOILWTICO` |
|
| 89 |
-
| 46 | Chronos-Bolt 14-step quantile [0.1, 0.5, 0.9] | ✅ | `
|
| 90 |
| 47 | TimesFM-2 + synthesized quantile via residual regression | ✅ | `forecast_v2/ensemble_brent.py:74-99`, `R3_TIMESFM_QUANTILE.json` |
|
| 91 |
| 48 | Prophet weekly+yearly | ✅ | `R3_PAST_SELF.json` ensemble row (Prophet seasonality) |
|
| 92 |
| 49 | ARIMA(5,1,0) classical baseline | ✅ | `R3_PAST_SELF.json` ensemble row |
|
|
@@ -98,7 +98,7 @@ Bullet-by-bullet status across the 4 sections D, E, F, G, H, I (~140 bullets). E
|
|
| 98 |
| 55 | 8 FRED targets (WTI, copper, EUR/USD, JPY/USD, CNY/USD, KOR/USD, EUR-USD, PPICMM) | ✅ | `tft_v2_metrics.json: targets` (7 targets confirmed in train_tft_real.py: DCOILWTICO/PCOPPUSDM/DEXTAUS/DEXKOUS/DEXJPUS/DEXUSEU/DEXCHUS); 8th = PPICMM in tft_real_metrics |
|
| 99 |
| 56 | 3 horizons (7, 14, 28 days) | ⚠️ | `train_tft_real.py:39 HORIZON=14`; 7 and 28-day variants in `R3_PAST_SELF` rolling_backtest fields |
|
| 100 |
| 57 | PICP@80/90/95% calibration | ✅ | `R3_PAST_SELF.json: per_target_horizon.picp_*` |
|
| 101 |
-
| 58 | Per-horizon split-conformal (Foygel Barber 2022) | ✅ | `
|
| 102 |
| 59 | TimesFM-CP residual quantile regression | ✅ | `R3_TIMESFM_QUANTILE.json` + `R3_TimesFM_CP_WTI_dev95.receipt.yaml` |
|
| 103 |
| 60 | Heteroscedastic Ridge widths | ✅ | `R3_PAST_SELF.json: ridge_widths` |
|
| 104 |
| 61 | 2,883 business days (2015-2026) | ✅ | `tft_v2_metrics.json: train_size`, `rl/data/fred_cache.json` |
|
|
@@ -114,7 +114,7 @@ Bullet-by-bullet status across the 4 sections D, E, F, G, H, I (~140 bullets). E
|
|
| 114 |
|---|---|---|---|
|
| 115 |
| 63 | MC Dropout 50 forward passes | ✅ | `rl/uncertainty.py:1-50, n_passes=50`, `mc_dropout_v2.json` |
|
| 116 |
| 64 | Epistemic σ correlates accuracy (Q1=99.76%, Q4=55.92%) | ✅ | `mc_dropout_v2.json: reliability_full[bins]` |
|
| 117 |
-
| 65 | Conformal RL on Q-values (3 alpha 0.05/0.05/0.1) | ✅ | `
|
| 118 |
| 66 | Confidence-damped projection | ✅ | `rl/uncertainty.py: confidence_damping` + `crisis_library.py: damp_on_weak_match` |
|
| 119 |
| 67 | Beta-severity + Lognormal-duration MC | ✅ | `rl/surrogate/fast_monte_carlo.py: scenarios` |
|
| 120 |
| 68 | Numba JIT MC hotloop (10-50× speedup) | ✅ | `rl/surrogate/fast_monte_carlo.py: @numba.jit` |
|
|
@@ -147,7 +147,7 @@ Bullet-by-bullet status across the 4 sections D, E, F, G, H, I (~140 bullets). E
|
|
| 147 |
| 86 | 26 BEIR Wikipedia subset | ✅ | `R5_BEIR_MANUAL.json` |
|
| 148 |
| 87 | ChromaDB persistent at rl/rag/chroma_db/ | ✅ | dir present |
|
| 149 |
| 88 | Ollama nomic-embed-text (768d) | ✅ | `rl/rag/indexer.py:29-30 EMBEDDING_MODEL=nomic-embed-text` |
|
| 150 |
-
| 89 | mxbai-embed-large for crisis library | ✅ | `
|
| 151 |
| 90 | Corpus SHA-256 hash caching | ⚠️ | grep finds `corpus_hash` references in some scripts; not in indexer.py directly |
|
| 152 |
| 91 | min_score=0.60 | ✅ | `rl/rag/indexer.py:31 MIN_SCORE=0.60` |
|
| 153 |
| 92 | chunk_words=256, overlap=32, min=30 | ⚠️ | `indexer.py:32-33 chunk_words=300` (slightly different); overlap+min not in source |
|
|
@@ -207,8 +207,8 @@ Bullet-by-bullet status across the 4 sections D, E, F, G, H, I (~140 bullets). E
|
|
| 207 |
| 128 | 50 cached explanations | ✅ | cache implementation present |
|
| 208 |
| 129 | 3-4s per explanation on RTX 4080 | ✅ | latency profiled |
|
| 209 |
| 130 | Explainer stress test 50/50 pass | ✅ | `explainer_stress_v2.json: n_test=50, passed=50, pass_rate=1.0` (exact) |
|
| 210 |
-
| 131 | GCN edge attention PNG heatmaps | ✅ | `
|
| 211 |
-
| 132 | Provenance 5-tier trust classifier (regulatory/academic/reference/industry/uncertain) | ✅ | `
|
| 212 |
|
| 213 |
**Section I total: 13 ✅ + 1 ⚠️ = 14/14 = 100%**
|
| 214 |
|
|
|
|
| 2 |
|
| 3 |
Bullet-by-bullet status across the 4 sections D, E, F, G, H, I (~140 bullets). Each row links to a file or a JSON receipt that proves the claim.
|
| 4 |
|
| 5 |
+
**Note:** receipts named `R*_*.json` are mirrored from `versions/v3_arcadia/results/` to `FINAL_SUBMIT/receipts/`.
|
| 6 |
|
| 7 |
---
|
| 8 |
|
|
|
|
| 86 |
|---|---|---|---|
|
| 87 |
| 44 | Custom TFT 513K params on 3-target FRED | ✅ | `tft_real_metrics.json: params, test_mae_p50: {DCOILWTICO, PCOPPUSDM, PPICMM}` |
|
| 88 |
| 45 | Custom TFT 90K params on real WTI MAE $7.83/bbl | ✅ | `tft_v2_metrics.json: params, test_mae_p50.DCOILWTICO` |
|
| 89 |
+
| 46 | Chronos-Bolt 14-step quantile [0.1, 0.5, 0.9] | ✅ | `versions/v5_phoenix/forecast_v2/ensemble_brent.py:53-71` (pass-10), `R3_TIMESFM_QUANTILE.json` |
|
| 90 |
| 47 | TimesFM-2 + synthesized quantile via residual regression | ✅ | `forecast_v2/ensemble_brent.py:74-99`, `R3_TIMESFM_QUANTILE.json` |
|
| 91 |
| 48 | Prophet weekly+yearly | ✅ | `R3_PAST_SELF.json` ensemble row (Prophet seasonality) |
|
| 92 |
| 49 | ARIMA(5,1,0) classical baseline | ✅ | `R3_PAST_SELF.json` ensemble row |
|
|
|
|
| 98 |
| 55 | 8 FRED targets (WTI, copper, EUR/USD, JPY/USD, CNY/USD, KOR/USD, EUR-USD, PPICMM) | ✅ | `tft_v2_metrics.json: targets` (7 targets confirmed in train_tft_real.py: DCOILWTICO/PCOPPUSDM/DEXTAUS/DEXKOUS/DEXJPUS/DEXUSEU/DEXCHUS); 8th = PPICMM in tft_real_metrics |
|
| 99 |
| 56 | 3 horizons (7, 14, 28 days) | ⚠️ | `train_tft_real.py:39 HORIZON=14`; 7 and 28-day variants in `R3_PAST_SELF` rolling_backtest fields |
|
| 100 |
| 57 | PICP@80/90/95% calibration | ✅ | `R3_PAST_SELF.json: per_target_horizon.picp_*` |
|
| 101 |
+
| 58 | Per-horizon split-conformal (Foygel Barber 2022) | ✅ | `versions/v5_phoenix/receipts_v2/R3_TimesFM_CP_WTI_dev95.receipt.yaml` |
|
| 102 |
| 59 | TimesFM-CP residual quantile regression | ✅ | `R3_TIMESFM_QUANTILE.json` + `R3_TimesFM_CP_WTI_dev95.receipt.yaml` |
|
| 103 |
| 60 | Heteroscedastic Ridge widths | ✅ | `R3_PAST_SELF.json: ridge_widths` |
|
| 104 |
| 61 | 2,883 business days (2015-2026) | ✅ | `tft_v2_metrics.json: train_size`, `rl/data/fred_cache.json` |
|
|
|
|
| 114 |
|---|---|---|---|
|
| 115 |
| 63 | MC Dropout 50 forward passes | ✅ | `rl/uncertainty.py:1-50, n_passes=50`, `mc_dropout_v2.json` |
|
| 116 |
| 64 | Epistemic σ correlates accuracy (Q1=99.76%, Q4=55.92%) | ✅ | `mc_dropout_v2.json: reliability_full[bins]` |
|
| 117 |
+
| 65 | Conformal RL on Q-values (3 alpha 0.05/0.05/0.1) | ✅ | `versions/v4_arcadia_live/features/conformal_rl.py:1-50` + `versions/v5_phoenix/action_v2/conformal.py` |
|
| 118 |
| 66 | Confidence-damped projection | ✅ | `rl/uncertainty.py: confidence_damping` + `crisis_library.py: damp_on_weak_match` |
|
| 119 |
| 67 | Beta-severity + Lognormal-duration MC | ✅ | `rl/surrogate/fast_monte_carlo.py: scenarios` |
|
| 120 |
| 68 | Numba JIT MC hotloop (10-50× speedup) | ✅ | `rl/surrogate/fast_monte_carlo.py: @numba.jit` |
|
|
|
|
| 147 |
| 86 | 26 BEIR Wikipedia subset | ✅ | `R5_BEIR_MANUAL.json` |
|
| 148 |
| 87 | ChromaDB persistent at rl/rag/chroma_db/ | ✅ | dir present |
|
| 149 |
| 88 | Ollama nomic-embed-text (768d) | ✅ | `rl/rag/indexer.py:29-30 EMBEDDING_MODEL=nomic-embed-text` |
|
| 150 |
+
| 89 | mxbai-embed-large for crisis library | ✅ | `versions/v4_arcadia_live/scenarios/library_v2_search.py` (pass-6) |
|
| 151 |
| 90 | Corpus SHA-256 hash caching | ⚠️ | grep finds `corpus_hash` references in some scripts; not in indexer.py directly |
|
| 152 |
| 91 | min_score=0.60 | ✅ | `rl/rag/indexer.py:31 MIN_SCORE=0.60` |
|
| 153 |
| 92 | chunk_words=256, overlap=32, min=30 | ⚠️ | `indexer.py:32-33 chunk_words=300` (slightly different); overlap+min not in source |
|
|
|
|
| 207 |
| 128 | 50 cached explanations | ✅ | cache implementation present |
|
| 208 |
| 129 | 3-4s per explanation on RTX 4080 | ✅ | latency profiled |
|
| 209 |
| 130 | Explainer stress test 50/50 pass | ✅ | `explainer_stress_v2.json: n_test=50, passed=50, pass_rate=1.0` (exact) |
|
| 210 |
+
| 131 | GCN edge attention PNG heatmaps | ✅ | `versions/v4_arcadia_live/features/gcn_attention_viz.py` |
|
| 211 |
+
| 132 | Provenance 5-tier trust classifier (regulatory/academic/reference/industry/uncertain) | ✅ | `versions/v4_arcadia_live/features/rag_provenance.py:39-49` (5 tiers) |
|
| 212 |
|
| 213 |
**Section I total: 13 ✅ + 1 ⚠️ = 14/14 = 100%**
|
| 214 |
|
FINAL_SUBMIT/FEATURE_INVENTORY_JT.md
CHANGED
|
@@ -48,7 +48,7 @@ Bullet-by-bullet status across J/K/L/M/N/O/P/Q/R/S/T (~200 bullets). Same legend
|
|
| 48 |
|---|---|---|---|
|
| 49 |
| 21 | NSGA2 via pymoo | ✅ | `rl/pareto/*` (pymoo import) |
|
| 50 |
| 22 | 3 objectives (cost, resilience_loss, carbon) | ✅ | `pareto_results.json: objective_names` exact |
|
| 51 |
-
| 23 | Carbon factors per IMO/EPA/ICAO | ✅ | `
|
| 52 |
| 24 | Air 0.82 / Sea 0.013 / Sea express 0.026 / Rail 0.028 / Road 0.096 kg CO2/tonne-km | ✅ | constants in source |
|
| 53 |
| 25 | 20 mitigation plans tested | ⚠️ | `pareto_results.json: n_policies=5` (smaller run); 20-plan run may be older or in `pareto_frontier_v2.json` |
|
| 54 |
| 26 | 11 Pareto-frontier plans (55%) | ⚠️ | current receipts 2/5 and 3/5; 11/20 from older run |
|
|
@@ -73,11 +73,11 @@ Bullet-by-bullet status across J/K/L/M/N/O/P/Q/R/S/T (~200 bullets). Same legend
|
|
| 73 |
| 36 | GPU MC: 1 state → 100K with noise linspace(0.01-0.3) | ✅ | `rl/surrogate/gpu_monte_carlo.py` |
|
| 74 |
| 37 | 80ms for 100K scenarios | ✅ | profiled in module |
|
| 75 |
| 38 | p5/p50/p95/p99/cvar_10 outputs | ✅ | gpu_monte_carlo.py |
|
| 76 |
-
| 39 | Counterfactual digital twin (100 rollouts MC) | ✅ | `
|
| 77 |
| 40 | REVENUE_AT_RISK_USD: easy $200M / med $320M / hard $400M | ✅ | constants in twin.py |
|
| 78 |
| 41 | Severity multiplier 0.5 + 1.0 × clamp(severity, 0, 1) | ✅ | twin.py formula |
|
| 79 |
| 42 | TwinReport dataclass (median, p95, savings, CI95, savings_pct) | ✅ | twin.py |
|
| 80 |
-
| 43 | Receipt: $178.68M saved (48%) at sev=0.85, brent=$123, n=30 | ✅ | `
|
| 81 |
|
| 82 |
**M: 13/13 = 100%**
|
| 83 |
|
|
@@ -87,7 +87,7 @@ Bullet-by-bullet status across J/K/L/M/N/O/P/Q/R/S/T (~200 bullets). Same legend
|
|
| 87 |
|
| 88 |
| # | Bullet | Status | Evidence |
|
| 89 |
|---|---|---|---|
|
| 90 |
-
| 44 | NewsAPI (5 keyword queries, 7-day, 100 req/day) | ✅ | `
|
| 91 |
| 45 | GDELT 2.0 Doc API (15-min refresh, tone severity) | ✅ | `sources/gdelt.py` |
|
| 92 |
| 46 | USGS M4.5+ in last 24h, 6 region boxes | ✅ | `sources/usgs.py` |
|
| 93 |
| 47 | FRED Brent DCOILBRENTEU daily spot | ✅ | `sources/fred_brent.py` |
|
|
@@ -96,7 +96,7 @@ Bullet-by-bullet status across J/K/L/M/N/O/P/Q/R/S/T (~200 bullets). Same legend
|
|
| 96 |
| 50 | FRED severity max(\|DoD\|/5%, \|WoW\|/10%) capped 1.0 | ✅ | `fred_brent.py: compute_severity` |
|
| 97 |
| 51 | GDELT tone-derived severity | ✅ | `gdelt.py: tone_to_severity` |
|
| 98 |
| 52 | USGS magnitude-based severity | ✅ | `usgs.py: magnitude_to_severity` |
|
| 99 |
-
| 53 | SQLite events.db with full schema | ✅ | `
|
| 100 |
| 54 | 4 indices (source+hash, ts, region, type) | ✅ | `store.py: CREATE INDEX` × 4 |
|
| 101 |
| 55 | SHA-256 dedup hash 16 chars | ✅ | `store.py: hashlib.sha256(...).hexdigest()[:16]` |
|
| 102 |
| 56 | 24-hour dedup window | ✅ | `store.py: DEDUP_WINDOW_S = 86400` |
|
|
@@ -112,7 +112,7 @@ Bullet-by-bullet status across J/K/L/M/N/O/P/Q/R/S/T (~200 bullets). Same legend
|
|
| 112 |
|
| 113 |
| # | Bullet | Status | Evidence |
|
| 114 |
|---|---|---|---|
|
| 115 |
-
| 60 | 8 hand-curated real events (2022-2026) | ✅ | `
|
| 116 |
| 61 | 3-4 citations per event (Reuters/BBC/CNBC/FRED/IDF/DoD/UNCTAD/Lloyd's) | ✅ | each event.citations[] in JSON has 3-4 entries with publisher field |
|
| 117 |
| 62 | Curation policy ≥3 citations | ✅ | grep `citations` in v1 library |
|
| 118 |
| 63 | mxbai embedding mode | ✅ | `realtime/crisis_library.py` SentenceTransformer |
|
|
@@ -172,7 +172,7 @@ Bullet-by-bullet status across J/K/L/M/N/O/P/Q/R/S/T (~200 bullets). Same legend
|
|
| 172 |
| 110 | CatBoost (1500 iters, depth 8, GPU) | ✅ | DataCo training receipt |
|
| 173 |
| 111 | TabPFN-v2 classifier (zero-shot) | ✅ | `tabpfn_verify.json + tabpfn_risk_judge.py` |
|
| 174 |
| 112 | TabPFN-v2 regressor | ✅ | wired in pass-10 ensemble |
|
| 175 |
-
| 113 | TabPFN bagging | ✅ | `v3_arcadia/10_caramel/r2_tabpfn_bagging.py` + `R2_BENEFIT_FIX.json` |
|
| 176 |
| 114 | Stacking with Ridge meta-learner | ✅ | `R3_STACKING_V2.json` |
|
| 177 |
| 115 | 5-fold CV | ✅ | rolling-fold in stacking |
|
| 178 |
| 116 | OOF predictions | ✅ | `R3_STACKING_V2.json: oof_predictions` |
|
|
@@ -194,7 +194,7 @@ Bullet-by-bullet status across J/K/L/M/N/O/P/Q/R/S/T (~200 bullets). Same legend
|
|
| 194 |
|
| 195 |
| # | Bullet | Status | Evidence |
|
| 196 |
|---|---|---|---|
|
| 197 |
-
| 126 | Political risk GBR R²=0.994, MAE=0.0095 on 214 countries | ✅ | `
|
| 198 |
| 127 | Political risk LSTM | ✅ | alternate model in same module |
|
| 199 |
| 128 | Dependency MLP acc=97.45% on 144K | ✅ | `features/dependency_mlp.py + F11_*.json` |
|
| 200 |
| 129 | Financial impact Ridge R²=0.736, MAE=$26.04 | ✅ | `features/financial_ridge.py + F8_*.json` |
|
|
@@ -241,7 +241,7 @@ Bullet-by-bullet status across J/K/L/M/N/O/P/Q/R/S/T (~200 bullets). Same legend
|
|
| 241 |
| 152 | R6_MaskingAblation_easy_lift = 26.768% | ✅ | `R6_GETHSEMANE_MASKING_ABLATION.json` |
|
| 242 |
| 153 | R6_GCN_easy_MAE_vs_MLP = 48.025% | ✅ | `R6_PROVIDER_V2.json: easy.improvement_vs_mlp_pct = 48.025` exact |
|
| 243 |
| 154 | R6_AquaRegia_WTI_dev95 = 0.0238 | ✅ | `R6_AQUA_REGIA_V2.json` |
|
| 244 |
-
| 155 | R3_TimesFM_CP_WTI_dev95 = 0.050 | ✅ | `
|
| 245 |
| 156 | V4_SPOF_V2_F1 = 1.0 | ✅ | F23 receipt |
|
| 246 |
| 157 | V4_STACKING_V2_lift_vs_WV = 0.0045 | ✅ | `R3_STACKING_V2.json` |
|
| 247 |
| 158 | V4_Live_Brent_202604 = $123.28 | ✅ | live FRED fetch on 2026-04-21 |
|
|
@@ -253,15 +253,15 @@ Bullet-by-bullet status across J/K/L/M/N/O/P/Q/R/S/T (~200 bullets). Same legend
|
|
| 253 |
| 164 | V5_Arena_baseline_leaderboard = 6 baselines | ✅ | `R6_ALGO_COMPARISON.json` per_algorithm has 4 + 2 implicit = 6 |
|
| 254 |
| 165 | V5_Twin_savings_gt_zero = $178,684,200 | ✅ | twin receipt |
|
| 255 |
| 166 | V5_DPO_JUDGE_preference_pairs_built = 21 | ✅ | `dpo_judge/data/preference_pairs.jsonl: 21 lines` exact |
|
| 256 |
-
| 167 | V5_Skill_pack_shipped = 4 files | ✅ | `
|
| 257 |
| 168 | V5_Phoenix_tests_green = 15 passed | ✅ | phoenix smoke = 15 |
|
| 258 |
-
| 169 | SHA-256 stdout tracking | ✅ | `
|
| 259 |
| 170 | Hardware capture (CUDA detection) | ✅ | framework.py |
|
| 260 |
| 171 | Runtime tracking | ✅ | framework.py |
|
| 261 |
| 172 | 5 comparators (==, >=, <=, in_range, regex) | ✅ | framework.py |
|
| 262 |
-
| 173 | Tamper-evident SHA-256 + INDEX.json + INDEX.md auto-generated | ✅ | `
|
| 263 |
| 174 | Tiny YAML parser (no PyYAML dep) | ✅ | framework.py |
|
| 264 |
-
| 175 | 271-line framework.py | ✅ | `wc -l
|
| 265 |
|
| 266 |
**T: 28/28 = 100%**
|
| 267 |
|
|
|
|
| 48 |
|---|---|---|---|
|
| 49 |
| 21 | NSGA2 via pymoo | ✅ | `rl/pareto/*` (pymoo import) |
|
| 50 |
| 22 | 3 objectives (cost, resilience_loss, carbon) | ✅ | `pareto_results.json: objective_names` exact |
|
| 51 |
+
| 23 | Carbon factors per IMO/EPA/ICAO | ✅ | `versions/v4_arcadia_live/features/pareto_carbon.py` constants |
|
| 52 |
| 24 | Air 0.82 / Sea 0.013 / Sea express 0.026 / Rail 0.028 / Road 0.096 kg CO2/tonne-km | ✅ | constants in source |
|
| 53 |
| 25 | 20 mitigation plans tested | ⚠️ | `pareto_results.json: n_policies=5` (smaller run); 20-plan run may be older or in `pareto_frontier_v2.json` |
|
| 54 |
| 26 | 11 Pareto-frontier plans (55%) | ⚠️ | current receipts 2/5 and 3/5; 11/20 from older run |
|
|
|
|
| 73 |
| 36 | GPU MC: 1 state → 100K with noise linspace(0.01-0.3) | ✅ | `rl/surrogate/gpu_monte_carlo.py` |
|
| 74 |
| 37 | 80ms for 100K scenarios | ✅ | profiled in module |
|
| 75 |
| 38 | p5/p50/p95/p99/cvar_10 outputs | ✅ | gpu_monte_carlo.py |
|
| 76 |
+
| 39 | Counterfactual digital twin (100 rollouts MC) | ✅ | `versions/v5_phoenix/counterfactual_twin/twin.py` |
|
| 77 |
| 40 | REVENUE_AT_RISK_USD: easy $200M / med $320M / hard $400M | ✅ | constants in twin.py |
|
| 78 |
| 41 | Severity multiplier 0.5 + 1.0 × clamp(severity, 0, 1) | ✅ | twin.py formula |
|
| 79 |
| 42 | TwinReport dataclass (median, p95, savings, CI95, savings_pct) | ✅ | twin.py |
|
| 80 |
+
| 43 | Receipt: $178.68M saved (48%) at sev=0.85, brent=$123, n=30 | ✅ | `versions/v5_phoenix/receipts_v2/V5_Twin_savings_gt_zero.receipt.yaml` |
|
| 81 |
|
| 82 |
**M: 13/13 = 100%**
|
| 83 |
|
|
|
|
| 87 |
|
| 88 |
| # | Bullet | Status | Evidence |
|
| 89 |
|---|---|---|---|
|
| 90 |
+
| 44 | NewsAPI (5 keyword queries, 7-day, 100 req/day) | ✅ | `versions/v4_arcadia_live/realtime/sources/newsapi.py` |
|
| 91 |
| 45 | GDELT 2.0 Doc API (15-min refresh, tone severity) | ✅ | `sources/gdelt.py` |
|
| 92 |
| 46 | USGS M4.5+ in last 24h, 6 region boxes | ✅ | `sources/usgs.py` |
|
| 93 |
| 47 | FRED Brent DCOILBRENTEU daily spot | ✅ | `sources/fred_brent.py` |
|
|
|
|
| 96 |
| 50 | FRED severity max(\|DoD\|/5%, \|WoW\|/10%) capped 1.0 | ✅ | `fred_brent.py: compute_severity` |
|
| 97 |
| 51 | GDELT tone-derived severity | ✅ | `gdelt.py: tone_to_severity` |
|
| 98 |
| 52 | USGS magnitude-based severity | ✅ | `usgs.py: magnitude_to_severity` |
|
| 99 |
+
| 53 | SQLite events.db with full schema | ✅ | `versions/v4_arcadia_live/realtime/store.py: DB_PATH` |
|
| 100 |
| 54 | 4 indices (source+hash, ts, region, type) | ✅ | `store.py: CREATE INDEX` × 4 |
|
| 101 |
| 55 | SHA-256 dedup hash 16 chars | ✅ | `store.py: hashlib.sha256(...).hexdigest()[:16]` |
|
| 102 |
| 56 | 24-hour dedup window | ✅ | `store.py: DEDUP_WINDOW_S = 86400` |
|
|
|
|
| 112 |
|
| 113 |
| # | Bullet | Status | Evidence |
|
| 114 |
|---|---|---|---|
|
| 115 |
+
| 60 | 8 hand-curated real events (2022-2026) | ✅ | `versions/v4_arcadia_live/scenarios/iran_israel_hormuz_2024_2026.json: 8 events` exact |
|
| 116 |
| 61 | 3-4 citations per event (Reuters/BBC/CNBC/FRED/IDF/DoD/UNCTAD/Lloyd's) | ✅ | each event.citations[] in JSON has 3-4 entries with publisher field |
|
| 117 |
| 62 | Curation policy ≥3 citations | ✅ | grep `citations` in v1 library |
|
| 118 |
| 63 | mxbai embedding mode | ✅ | `realtime/crisis_library.py` SentenceTransformer |
|
|
|
|
| 172 |
| 110 | CatBoost (1500 iters, depth 8, GPU) | ✅ | DataCo training receipt |
|
| 173 |
| 111 | TabPFN-v2 classifier (zero-shot) | ✅ | `tabpfn_verify.json + tabpfn_risk_judge.py` |
|
| 174 |
| 112 | TabPFN-v2 regressor | ✅ | wired in pass-10 ensemble |
|
| 175 |
+
| 113 | TabPFN bagging | ✅ | `versions/v3_arcadia/10_caramel/r2_tabpfn_bagging.py` + `R2_BENEFIT_FIX.json` |
|
| 176 |
| 114 | Stacking with Ridge meta-learner | ✅ | `R3_STACKING_V2.json` |
|
| 177 |
| 115 | 5-fold CV | ✅ | rolling-fold in stacking |
|
| 178 |
| 116 | OOF predictions | ✅ | `R3_STACKING_V2.json: oof_predictions` |
|
|
|
|
| 194 |
|
| 195 |
| # | Bullet | Status | Evidence |
|
| 196 |
|---|---|---|---|
|
| 197 |
+
| 126 | Political risk GBR R²=0.994, MAE=0.0095 on 214 countries | ✅ | `versions/v4_arcadia_live/features/political_risk.py + receipts/F12_*.json` |
|
| 198 |
| 127 | Political risk LSTM | ✅ | alternate model in same module |
|
| 199 |
| 128 | Dependency MLP acc=97.45% on 144K | ✅ | `features/dependency_mlp.py + F11_*.json` |
|
| 200 |
| 129 | Financial impact Ridge R²=0.736, MAE=$26.04 | ✅ | `features/financial_ridge.py + F8_*.json` |
|
|
|
|
| 241 |
| 152 | R6_MaskingAblation_easy_lift = 26.768% | ✅ | `R6_GETHSEMANE_MASKING_ABLATION.json` |
|
| 242 |
| 153 | R6_GCN_easy_MAE_vs_MLP = 48.025% | ✅ | `R6_PROVIDER_V2.json: easy.improvement_vs_mlp_pct = 48.025` exact |
|
| 243 |
| 154 | R6_AquaRegia_WTI_dev95 = 0.0238 | ✅ | `R6_AQUA_REGIA_V2.json` |
|
| 244 |
+
| 155 | R3_TimesFM_CP_WTI_dev95 = 0.050 | ✅ | `versions/v5_phoenix/receipts_v2/R3_TimesFM_CP_WTI_dev95.receipt.yaml` |
|
| 245 |
| 156 | V4_SPOF_V2_F1 = 1.0 | ✅ | F23 receipt |
|
| 246 |
| 157 | V4_STACKING_V2_lift_vs_WV = 0.0045 | ✅ | `R3_STACKING_V2.json` |
|
| 247 |
| 158 | V4_Live_Brent_202604 = $123.28 | ✅ | live FRED fetch on 2026-04-21 |
|
|
|
|
| 253 |
| 164 | V5_Arena_baseline_leaderboard = 6 baselines | ✅ | `R6_ALGO_COMPARISON.json` per_algorithm has 4 + 2 implicit = 6 |
|
| 254 |
| 165 | V5_Twin_savings_gt_zero = $178,684,200 | ✅ | twin receipt |
|
| 255 |
| 166 | V5_DPO_JUDGE_preference_pairs_built = 21 | ✅ | `dpo_judge/data/preference_pairs.jsonl: 21 lines` exact |
|
| 256 |
+
| 167 | V5_Skill_pack_shipped = 4 files | ✅ | `versions/v5_phoenix/supplymind_skills/*` 4+ skills |
|
| 257 |
| 168 | V5_Phoenix_tests_green = 15 passed | ✅ | phoenix smoke = 15 |
|
| 258 |
+
| 169 | SHA-256 stdout tracking | ✅ | `versions/v5_phoenix/receipts_v2/framework.py` |
|
| 259 |
| 170 | Hardware capture (CUDA detection) | ✅ | framework.py |
|
| 260 |
| 171 | Runtime tracking | ✅ | framework.py |
|
| 261 |
| 172 | 5 comparators (==, >=, <=, in_range, regex) | ✅ | framework.py |
|
| 262 |
+
| 173 | Tamper-evident SHA-256 + INDEX.json + INDEX.md auto-generated | ✅ | `versions/v5_phoenix/receipts_v2/INDEX.{json,md}` |
|
| 263 |
| 174 | Tiny YAML parser (no PyYAML dep) | ✅ | framework.py |
|
| 264 |
+
| 175 | 271-line framework.py | ✅ | `wc -l versions/v5_phoenix/receipts_v2/framework.py` |
|
| 265 |
|
| 266 |
**T: 28/28 = 100%**
|
| 267 |
|
FINAL_SUBMIT/FEATURE_INVENTORY_UBB.md
CHANGED
|
@@ -8,7 +8,7 @@ Bullet-by-bullet status across U/V/W/X/Y/Z/AA/BB (~180 bullets). Same legend as
|
|
| 8 |
|
| 9 |
| # | Bullet | Status | Evidence |
|
| 10 |
|---|---|---|---|
|
| 11 |
-
| 1 | Karpathy-pattern overnight loop | ✅ | `
|
| 12 |
| 2 | LLM hypothesis generation (Qwen-14B local or Claude) | ✅ | `hypothesis_engine.py` |
|
| 13 |
| 3 | Mutable `candidate_train.py` with safe-to-modify markers | ✅ | `autoresearch_fixed/candidate_train.py` |
|
| 14 |
| 4 | Frozen `program.md` (immutable) | ✅ | `autoresearch_fixed/program.md` |
|
|
@@ -42,11 +42,11 @@ Bullet-by-bullet status across U/V/W/X/Y/Z/AA/BB (~180 bullets). Same legend as
|
|
| 42 |
|
| 43 |
| # | Bullet | Status | Evidence |
|
| 44 |
|---|---|---|---|
|
| 45 |
-
| 26 | Counterfactual digital twin 100 rollouts MC | ✅ | `
|
| 46 |
| 27 | Arena leaderboard 6 baselines pre-seeded | ✅ | `arena/leaderboard.json: n_baselines=6` exact |
|
| 47 |
| 28 | MaskablePPO #1 mean=2.209 CI95=[2.178,2.239] | ✅ | `arena/leaderboard.json: rows[0] = MaskablePPO-v3 (ours), overall_reward_mean=2.209, overall_ci95=[2.178,2.239]` EXACT |
|
| 48 |
| 29 | runner.py with TaskResult + ArenaResult dataclasses | ✅ | `arena/runner.py` |
|
| 49 |
-
| 30 | 3 Claude Code skills (benchmark-runner, autoresearch-experiment, live-demo-orchestrator) | ✅ | `
|
| 50 |
| 31 | plugin.json v1.0.0 manifest | ✅ | `supplymind_skills/plugin.json` |
|
| 51 |
| 32 | Replay cache 8 events frozen | ✅ | `realtime_v5/replay_cache_latest.json: n_events=8` exact |
|
| 52 |
| 33 | replay_cache_latest.json + timestamped snapshot | ✅ | dir contains both |
|
|
@@ -56,7 +56,7 @@ Bullet-by-bullet status across U/V/W/X/Y/Z/AA/BB (~180 bullets). Same legend as
|
|
| 56 |
| 37 | DPO 21 pairs Qwen-2.5-3B LoRA r=8 | ✅ | `dpo_judge/data/preference_pairs.jsonl` 21 lines |
|
| 57 |
| 38 | TRL fallback for ROLL fragility | ✅ | `dpo_judge/train_dpo_trl.py` |
|
| 58 |
| 39 | Two upstream PRs ready (Meta OpenEnv + Alibaba ROLL) | ✅ | `docs/PHOENIX_PUSH_REPORT.md` |
|
| 59 |
-
| 40 | build_pr_branch.sh | ✅ | `
|
| 60 |
| 41 | Phoenix isolation (v3+v4 untouched) + copy-before-edit + .venv-roll/ | ✅ | docs note |
|
| 61 |
| 42 | phoenix_app.py mounts /arena /twin /replay + /phoenix/status | ✅ | `phoenix_app.py` + server/app.py mount |
|
| 62 |
|
|
@@ -74,7 +74,7 @@ Bullet-by-bullet status across U/V/W/X/Y/Z/AA/BB (~180 bullets). Same legend as
|
|
| 74 |
| 46 | Non-root appuser UID 1000 | ✅ | Dockerfile RUN useradd |
|
| 75 |
| 47 | HEALTHCHECK curl /health every 30s | ✅ | Dockerfile HEALTHCHECK |
|
| 76 |
| 48 | uvicorn server.app:app entry | ✅ | Dockerfile CMD |
|
| 77 |
-
| 49 | HF Space at huggingface.co/spaces/Shaurya-Noodle/Supplymind | ✅ | `DEPLOY_HF_SPACE.md` |
|
| 78 |
| 50 | ONNX <5e-5 roundtrip 4 models | ✅ | `onnx_roundtrip.json` (BC 3.05e-5, CQL 5.22e-8, IQL 3.05e-5, TD3+BC 1.53e-5) all <5e-5 |
|
| 79 |
| 51 | .gitignore excludes 159GB models/ | ✅ | `.gitignore` line `models/` |
|
| 80 |
| 52 | <2GB container size | ✅ | DEPLOY_HF_SPACE notes |
|
|
@@ -153,7 +153,7 @@ Bullet-by-bullet status across U/V/W/X/Y/Z/AA/BB (~180 bullets). Same legend as
|
|
| 153 |
| 107 | 15+ disruption taxonomy | ✅ | `server/data/disruptions.json` |
|
| 154 |
| 108 | 15 leading indicators with correlations | ✅ | `rl/leading_indicators.py` |
|
| 155 |
| 109 | FRED state[400:407] features | ✅ | `rl/state_builder.py` slice |
|
| 156 |
-
| 110 | 40+ industry citations DATA_SOURCES.md | ✅ | `DATA_SOURCES.md` |
|
| 157 |
|
| 158 |
**Y: 21 ✅ + 2 ⚠️ = 23/23 = 100%**
|
| 159 |
|
|
@@ -164,42 +164,42 @@ Bullet-by-bullet status across U/V/W/X/Y/Z/AA/BB (~180 bullets). Same legend as
|
|
| 164 |
| # | Doc | Status | Path |
|
| 165 |
|---|---|---|---|
|
| 166 |
| 111 | README.md (40KB) | ✅ | repo root |
|
| 167 |
-
| 112 | SUPPLYMIND_BLUEPRINT.md (81KB) | ✅ | repo root |
|
| 168 |
-
| 113 | ALIENWARE_KICKOFF.md (53KB) | ✅ | repo root |
|
| 169 |
-
| 114 | AUDIT_PLAN.md (22KB) | ✅ | repo root |
|
| 170 |
| 115 | MODEL_CARD.md (19KB) | ✅ | repo root |
|
| 171 |
-
| 116 | PYTORCH_STORY.md | ✅ | repo root |
|
| 172 |
-
| 117 | BENCHMARKS_VS_PUBLIC.md | ✅ | repo root |
|
| 173 |
-
| 118 | DATA_SOURCES.md | ✅ | repo root |
|
| 174 |
-
| 119 | EXTERNAL_CREDIBILITY.md | ✅ | repo root |
|
| 175 |
-
| 120 | JUDGES.md | ✅ | repo root |
|
| 176 |
-
| 121 | FINAL_DEMO.md | ✅ | repo root |
|
| 177 |
-
| 122 | DEMO_SCRIPT.md | ✅ | repo root |
|
| 178 |
-
| 123 | DEPLOY_HF_SPACE.md | ✅ | repo root |
|
| 179 |
-
| 124 | EXECUTIVE_SUMMARY.md | ✅ | repo root |
|
| 180 |
-
| 125 | RESULTS.md | ✅ | repo root |
|
| 181 |
| 126 | CLONE_AND_STUDY.md | ✅ | docs/ |
|
| 182 |
| 127 | FINAL_AUDIT_REPORT.md | ✅ | docs/ |
|
| 183 |
| 128 | MULTI_TURN_GRPO_ROADMAP.md | ✅ | docs/ |
|
| 184 |
| 129 | LIVE_DEMO_HORMUZ.md | ✅ | demo/ or root |
|
| 185 |
-
| 130 | PREPRINT.md | ✅ |
|
| 186 |
-
| 131 | PREPRINT_V5.md | ✅ |
|
| 187 |
| 132 | PITCH_DECK.md | ✅ | demo/ |
|
| 188 |
-
| 133 | PITCH_DECK_V5.md | ✅ |
|
| 189 |
| 134 | DEMO_VIDEO_SCRIPT.md | ✅ | demo/ |
|
| 190 |
-
| 135 | DEMO_VIDEO_SCRIPT_V5.md | ✅ |
|
| 191 |
-
| 136 | JUDGES_V5.md | ✅ |
|
| 192 |
| 137 | CHECKLIST.md | ✅ | demo/ |
|
| 193 |
| 138 | LANDING_PAGE.md | ✅ | demo/ |
|
| 194 |
| 139 | EXTERNAL_OUTREACH.md | ✅ | demo/ |
|
| 195 |
| 140 | SECRETS_ROTATION.md | ✅ | docs/ |
|
| 196 |
-
| 141 | PHOENIX_PLAN_V5.md | ✅ |
|
| 197 |
-
| 142 | PHOENIX_COMPLETION_AUDIT.md | ✅ |
|
| 198 |
-
| 143 | PHOENIX_PUSH_REPORT.md | ✅ |
|
| 199 |
| 144 | HF_DEPLOY_V4.md | ✅ | docs/ |
|
| 200 |
| 145 | R4_RUBRIC_CHALLENGE.md | ✅ | challenges/ |
|
| 201 |
| 146 | FAILURE_TABLE.md | ✅ | repo root |
|
| 202 |
-
| 147 | 12 Sleep Token album-track stages (00_emergence → 95_arcadia) | ✅ | `v3_arcadia/` 12 dirs verified exact |
|
| 203 |
| 148 | Notebook 01_environment_quickstart | ✅ | `notebooks/01_environment_quickstart.ipynb` |
|
| 204 |
| 149 | Notebook 02_training_your_own_agent | ✅ | `notebooks/02_*.ipynb` |
|
| 205 |
| 150 | Notebook 03_reproducing_benchmarks | ✅ | same |
|
|
@@ -216,16 +216,16 @@ Bullet-by-bullet status across U/V/W/X/Y/Z/AA/BB (~180 bullets). Same legend as
|
|
| 216 |
|
| 217 |
| # | Bullet | Status | Evidence |
|
| 218 |
|---|---|---|---|
|
| 219 |
-
| 155 | Hero result card 10-number 2×5 grid | ✅ | `make_hero_card.py` + `v3_arcadia/plots/hero_*.png` |
|
| 220 |
| 156 | make_hero_card.py | ✅ | repo |
|
| 221 |
-
| 157 | Caramel reliability calibration curves | ✅ | `v3_arcadia/plots/r2_caramel_*` |
|
| 222 |
-
| 158 | R4 dangerous 7 plots | ✅ | `v3_arcadia/plots/r4_dangerous_*.png` |
|
| 223 |
-
| 159 | R5 granite 5 plots | ✅ | `v3_arcadia/plots/r5_granite_*.png` |
|
| 224 |
-
| 160 | R6 gethsemane 3 plots | ✅ | `v3_arcadia/plots/r6_gethsemane_*.png` |
|
| 225 |
-
| 161 | R3 past-self 2 plots | ✅ | `v3_arcadia/plots/r3_past_self_*.png` |
|
| 226 |
-
| 162 | R6 provider network graph | ✅ | `v3_arcadia/plots/r6_provider_graph.png` |
|
| 227 |
-
| 163 | R6 euclidian bootstrap CI bands | ✅ | `v3_arcadia/plots/r6_euclidian_*.png` |
|
| 228 |
-
| 164 | R6 aqua-regia coverage plot | ✅ | `v3_arcadia/plots/r6_aqua_regia_coverage.png` |
|
| 229 |
| 165 | GCN attention heatmaps 3 graphs | ✅ | `rl/gnn/attention.py` outputs PNG |
|
| 230 |
| 166 | Streamlit dashboard 12 panels | ✅ | `dashboard/streamlit_app.py` |
|
| 231 |
| 167 | Pareto 3D scatter Plotly | ✅ | `rl/pareto/visualize.py` |
|
|
@@ -246,7 +246,7 @@ Bullet-by-bullet status across U/V/W/X/Y/Z/AA/BB (~180 bullets). Same legend as
|
|
| 246 |
| 173 | Two-pass DeepSeek extraction (free CoT → Qwen JSON parse) | ✅ | `R4_DANGEROUS_V2.json: extractor field` 100% parse rate |
|
| 247 |
| 174 | Phoenix isolation guarantee 3 layers | ✅ | `PHOENIX_COMPLETION_AUDIT.md` |
|
| 248 |
| 175 | Copy-before-edit discipline | ✅ | `PHOENIX_PUSH_REPORT.md` |
|
| 249 |
-
| 176 | Tiny YAML parser (no PyYAML) | ✅ | `
|
| 250 |
| 177 | _corpus_hash SHA-256 embedding cache invalidation | ✅ | `crisis_library.py: corpus_hash` |
|
| 251 |
| 178 | Token-bucket OpenRouter limiter | ✅ | `openrouter_client.py: per_minute=18` |
|
| 252 |
| 179 | .openrouter_cache/ API caching | ✅ | dir exists |
|
|
@@ -263,7 +263,7 @@ Bullet-by-bullet status across U/V/W/X/Y/Z/AA/BB (~180 bullets). Same legend as
|
|
| 263 |
| 190 | Honest fallback labeling | ✅ | `data_source_flags.live_pipeline = "deterministic_rubric_fallback"` |
|
| 264 |
| 191 | judge_source field | ✅ | `_call_ollama_judge: judge_source = ollama:<model>` |
|
| 265 |
| 192 | Scenario JSON ingestion_note | ✅ | crisis library schema |
|
| 266 |
-
| 193 | 4-minute judge path designed | ✅ | `JUDGES.md` |
|
| 267 |
| 194 | 30-second receipt verification target | ✅ | `framework.py` design |
|
| 268 |
| 195 | Sleep Token thesis "Even in Arcadia, disruptions happen" | ✅ | tagline in docs |
|
| 269 |
|
|
@@ -301,7 +301,7 @@ Bullet-by-bullet status across U/V/W/X/Y/Z/AA/BB (~180 bullets). Same legend as
|
|
| 301 |
| 6 baselines pre-seeded | `n_baselines=6` ✅ EXACT |
|
| 302 |
| Replay cache 8 events | `replay_cache_latest.json: n_events=8` ✅ EXACT |
|
| 303 |
| Phoenix INDEX 20 receipts | `INDEX.json: list[20]` ✅ EXACT |
|
| 304 |
-
| 12 Sleep Token stages | `v3_arcadia/` 12 dirs ✅ EXACT |
|
| 305 |
| 125 .md docs | `find *.md` 125 ✅ |
|
| 306 |
| 4 ONNX <5e-5 | onnx_roundtrip ✅ |
|
| 307 |
| Token-bucket 18 req/min, 950 req/day | `openrouter_client.py` ✅ EXACT |
|
|
|
|
| 8 |
|
| 9 |
| # | Bullet | Status | Evidence |
|
| 10 |
|---|---|---|---|
|
| 11 |
+
| 1 | Karpathy-pattern overnight loop | ✅ | `versions/v5_phoenix/autoresearch_fixed/orchestrator.py` |
|
| 12 |
| 2 | LLM hypothesis generation (Qwen-14B local or Claude) | ✅ | `hypothesis_engine.py` |
|
| 13 |
| 3 | Mutable `candidate_train.py` with safe-to-modify markers | ✅ | `autoresearch_fixed/candidate_train.py` |
|
| 14 |
| 4 | Frozen `program.md` (immutable) | ✅ | `autoresearch_fixed/program.md` |
|
|
|
|
| 42 |
|
| 43 |
| # | Bullet | Status | Evidence |
|
| 44 |
|---|---|---|---|
|
| 45 |
+
| 26 | Counterfactual digital twin 100 rollouts MC | ✅ | `versions/v5_phoenix/counterfactual_twin/twin.py` |
|
| 46 |
| 27 | Arena leaderboard 6 baselines pre-seeded | ✅ | `arena/leaderboard.json: n_baselines=6` exact |
|
| 47 |
| 28 | MaskablePPO #1 mean=2.209 CI95=[2.178,2.239] | ✅ | `arena/leaderboard.json: rows[0] = MaskablePPO-v3 (ours), overall_reward_mean=2.209, overall_ci95=[2.178,2.239]` EXACT |
|
| 48 |
| 29 | runner.py with TaskResult + ArenaResult dataclasses | ✅ | `arena/runner.py` |
|
| 49 |
+
| 30 | 3 Claude Code skills (benchmark-runner, autoresearch-experiment, live-demo-orchestrator) | ✅ | `versions/v5_phoenix/supplymind_skills/` 3 dirs |
|
| 50 |
| 31 | plugin.json v1.0.0 manifest | ✅ | `supplymind_skills/plugin.json` |
|
| 51 |
| 32 | Replay cache 8 events frozen | ✅ | `realtime_v5/replay_cache_latest.json: n_events=8` exact |
|
| 52 |
| 33 | replay_cache_latest.json + timestamped snapshot | ✅ | dir contains both |
|
|
|
|
| 56 |
| 37 | DPO 21 pairs Qwen-2.5-3B LoRA r=8 | ✅ | `dpo_judge/data/preference_pairs.jsonl` 21 lines |
|
| 57 |
| 38 | TRL fallback for ROLL fragility | ✅ | `dpo_judge/train_dpo_trl.py` |
|
| 58 |
| 39 | Two upstream PRs ready (Meta OpenEnv + Alibaba ROLL) | ✅ | `docs/PHOENIX_PUSH_REPORT.md` |
|
| 59 |
+
| 40 | build_pr_branch.sh | ✅ | `versions/v5_phoenix/build_pr_branch.sh` |
|
| 60 |
| 41 | Phoenix isolation (v3+v4 untouched) + copy-before-edit + .venv-roll/ | ✅ | docs note |
|
| 61 |
| 42 | phoenix_app.py mounts /arena /twin /replay + /phoenix/status | ✅ | `phoenix_app.py` + server/app.py mount |
|
| 62 |
|
|
|
|
| 74 |
| 46 | Non-root appuser UID 1000 | ✅ | Dockerfile RUN useradd |
|
| 75 |
| 47 | HEALTHCHECK curl /health every 30s | ✅ | Dockerfile HEALTHCHECK |
|
| 76 |
| 48 | uvicorn server.app:app entry | ✅ | Dockerfile CMD |
|
| 77 |
+
| 49 | HF Space at huggingface.co/spaces/Shaurya-Noodle/Supplymind | ✅ | `docs/v3/DEPLOY_HF_SPACE.md` |
|
| 78 |
| 50 | ONNX <5e-5 roundtrip 4 models | ✅ | `onnx_roundtrip.json` (BC 3.05e-5, CQL 5.22e-8, IQL 3.05e-5, TD3+BC 1.53e-5) all <5e-5 |
|
| 79 |
| 51 | .gitignore excludes 159GB models/ | ✅ | `.gitignore` line `models/` |
|
| 80 |
| 52 | <2GB container size | ✅ | DEPLOY_HF_SPACE notes |
|
|
|
|
| 153 |
| 107 | 15+ disruption taxonomy | ✅ | `server/data/disruptions.json` |
|
| 154 |
| 108 | 15 leading indicators with correlations | ✅ | `rl/leading_indicators.py` |
|
| 155 |
| 109 | FRED state[400:407] features | ✅ | `rl/state_builder.py` slice |
|
| 156 |
+
| 110 | 40+ industry citations docs/core/DATA_SOURCES.md | ✅ | `docs/core/DATA_SOURCES.md` |
|
| 157 |
|
| 158 |
**Y: 21 ✅ + 2 ⚠️ = 23/23 = 100%**
|
| 159 |
|
|
|
|
| 164 |
| # | Doc | Status | Path |
|
| 165 |
|---|---|---|---|
|
| 166 |
| 111 | README.md (40KB) | ✅ | repo root |
|
| 167 |
+
| 112 | docs/core/SUPPLYMIND_BLUEPRINT.md (81KB) | ✅ | repo root |
|
| 168 |
+
| 113 | docs/dev_log/ALIENWARE_KICKOFF.md (53KB) | ✅ | repo root |
|
| 169 |
+
| 114 | docs/v4/AUDIT_PLAN.md (22KB) | ✅ | repo root |
|
| 170 |
| 115 | MODEL_CARD.md (19KB) | ✅ | repo root |
|
| 171 |
+
| 116 | docs/v3/PYTORCH_STORY.md | ✅ | repo root |
|
| 172 |
+
| 117 | docs/v3/BENCHMARKS_VS_PUBLIC.md | ✅ | repo root |
|
| 173 |
+
| 118 | docs/core/DATA_SOURCES.md | ✅ | repo root |
|
| 174 |
+
| 119 | docs/core/EXTERNAL_CREDIBILITY.md | ✅ | repo root |
|
| 175 |
+
| 120 | docs/v4/JUDGES.md | ✅ | repo root |
|
| 176 |
+
| 121 | docs/v3/FINAL_DEMO.md | ✅ | repo root |
|
| 177 |
+
| 122 | docs/v3/DEMO_SCRIPT.md | ✅ | repo root |
|
| 178 |
+
| 123 | docs/v3/DEPLOY_HF_SPACE.md | ✅ | repo root |
|
| 179 |
+
| 124 | docs/v3/EXECUTIVE_SUMMARY.md | ✅ | repo root |
|
| 180 |
+
| 125 | docs/v3/RESULTS.md | ✅ | repo root |
|
| 181 |
| 126 | CLONE_AND_STUDY.md | ✅ | docs/ |
|
| 182 |
| 127 | FINAL_AUDIT_REPORT.md | ✅ | docs/ |
|
| 183 |
| 128 | MULTI_TURN_GRPO_ROADMAP.md | ✅ | docs/ |
|
| 184 |
| 129 | LIVE_DEMO_HORMUZ.md | ✅ | demo/ or root |
|
| 185 |
+
| 130 | PREPRINT.md | ✅ | versions/v4_arcadia_live/docs/ |
|
| 186 |
+
| 131 | PREPRINT_V5.md | ✅ | versions/v5_phoenix/docs/ |
|
| 187 |
| 132 | PITCH_DECK.md | ✅ | demo/ |
|
| 188 |
+
| 133 | PITCH_DECK_V5.md | ✅ | versions/v5_phoenix/docs/ |
|
| 189 |
| 134 | DEMO_VIDEO_SCRIPT.md | ✅ | demo/ |
|
| 190 |
+
| 135 | DEMO_VIDEO_SCRIPT_V5.md | ✅ | versions/v5_phoenix/docs/ |
|
| 191 |
+
| 136 | JUDGES_V5.md | ✅ | versions/v5_phoenix/docs/ |
|
| 192 |
| 137 | CHECKLIST.md | ✅ | demo/ |
|
| 193 |
| 138 | LANDING_PAGE.md | ✅ | demo/ |
|
| 194 |
| 139 | EXTERNAL_OUTREACH.md | ✅ | demo/ |
|
| 195 |
| 140 | SECRETS_ROTATION.md | ✅ | docs/ |
|
| 196 |
+
| 141 | PHOENIX_PLAN_V5.md | ✅ | versions/v4_arcadia_live/docs/ |
|
| 197 |
+
| 142 | PHOENIX_COMPLETION_AUDIT.md | ✅ | versions/v5_phoenix/docs/ |
|
| 198 |
+
| 143 | PHOENIX_PUSH_REPORT.md | ✅ | versions/v5_phoenix/docs/ |
|
| 199 |
| 144 | HF_DEPLOY_V4.md | ✅ | docs/ |
|
| 200 |
| 145 | R4_RUBRIC_CHALLENGE.md | ✅ | challenges/ |
|
| 201 |
| 146 | FAILURE_TABLE.md | ✅ | repo root |
|
| 202 |
+
| 147 | 12 Sleep Token album-track stages (00_emergence → 95_arcadia) | ✅ | `versions/v3_arcadia/` 12 dirs verified exact |
|
| 203 |
| 148 | Notebook 01_environment_quickstart | ✅ | `notebooks/01_environment_quickstart.ipynb` |
|
| 204 |
| 149 | Notebook 02_training_your_own_agent | ✅ | `notebooks/02_*.ipynb` |
|
| 205 |
| 150 | Notebook 03_reproducing_benchmarks | ✅ | same |
|
|
|
|
| 216 |
|
| 217 |
| # | Bullet | Status | Evidence |
|
| 218 |
|---|---|---|---|
|
| 219 |
+
| 155 | Hero result card 10-number 2×5 grid | ✅ | `make_hero_card.py` + `versions/v3_arcadia/plots/hero_*.png` |
|
| 220 |
| 156 | make_hero_card.py | ✅ | repo |
|
| 221 |
+
| 157 | Caramel reliability calibration curves | ✅ | `versions/v3_arcadia/plots/r2_caramel_*` |
|
| 222 |
+
| 158 | R4 dangerous 7 plots | ✅ | `versions/v3_arcadia/plots/r4_dangerous_*.png` |
|
| 223 |
+
| 159 | R5 granite 5 plots | ✅ | `versions/v3_arcadia/plots/r5_granite_*.png` |
|
| 224 |
+
| 160 | R6 gethsemane 3 plots | ✅ | `versions/v3_arcadia/plots/r6_gethsemane_*.png` |
|
| 225 |
+
| 161 | R3 past-self 2 plots | ✅ | `versions/v3_arcadia/plots/r3_past_self_*.png` |
|
| 226 |
+
| 162 | R6 provider network graph | ✅ | `versions/v3_arcadia/plots/r6_provider_graph.png` |
|
| 227 |
+
| 163 | R6 euclidian bootstrap CI bands | ✅ | `versions/v3_arcadia/plots/r6_euclidian_*.png` |
|
| 228 |
+
| 164 | R6 aqua-regia coverage plot | ✅ | `versions/v3_arcadia/plots/r6_aqua_regia_coverage.png` |
|
| 229 |
| 165 | GCN attention heatmaps 3 graphs | ✅ | `rl/gnn/attention.py` outputs PNG |
|
| 230 |
| 166 | Streamlit dashboard 12 panels | ✅ | `dashboard/streamlit_app.py` |
|
| 231 |
| 167 | Pareto 3D scatter Plotly | ✅ | `rl/pareto/visualize.py` |
|
|
|
|
| 246 |
| 173 | Two-pass DeepSeek extraction (free CoT → Qwen JSON parse) | ✅ | `R4_DANGEROUS_V2.json: extractor field` 100% parse rate |
|
| 247 |
| 174 | Phoenix isolation guarantee 3 layers | ✅ | `PHOENIX_COMPLETION_AUDIT.md` |
|
| 248 |
| 175 | Copy-before-edit discipline | ✅ | `PHOENIX_PUSH_REPORT.md` |
|
| 249 |
+
| 176 | Tiny YAML parser (no PyYAML) | ✅ | `versions/v5_phoenix/receipts_v2/framework.py` |
|
| 250 |
| 177 | _corpus_hash SHA-256 embedding cache invalidation | ✅ | `crisis_library.py: corpus_hash` |
|
| 251 |
| 178 | Token-bucket OpenRouter limiter | ✅ | `openrouter_client.py: per_minute=18` |
|
| 252 |
| 179 | .openrouter_cache/ API caching | ✅ | dir exists |
|
|
|
|
| 263 |
| 190 | Honest fallback labeling | ✅ | `data_source_flags.live_pipeline = "deterministic_rubric_fallback"` |
|
| 264 |
| 191 | judge_source field | ✅ | `_call_ollama_judge: judge_source = ollama:<model>` |
|
| 265 |
| 192 | Scenario JSON ingestion_note | ✅ | crisis library schema |
|
| 266 |
+
| 193 | 4-minute judge path designed | ✅ | `docs/v4/JUDGES.md` |
|
| 267 |
| 194 | 30-second receipt verification target | ✅ | `framework.py` design |
|
| 268 |
| 195 | Sleep Token thesis "Even in Arcadia, disruptions happen" | ✅ | tagline in docs |
|
| 269 |
|
|
|
|
| 301 |
| 6 baselines pre-seeded | `n_baselines=6` ✅ EXACT |
|
| 302 |
| Replay cache 8 events | `replay_cache_latest.json: n_events=8` ✅ EXACT |
|
| 303 |
| Phoenix INDEX 20 receipts | `INDEX.json: list[20]` ✅ EXACT |
|
| 304 |
+
| 12 Sleep Token stages | `versions/v3_arcadia/` 12 dirs ✅ EXACT |
|
| 305 |
| 125 .md docs | `find *.md` 125 ✅ |
|
| 306 |
| 4 ONNX <5e-5 | onnx_roundtrip ✅ |
|
| 307 |
| Token-bucket 18 req/min, 950 req/day | `openrouter_client.py` ✅ EXACT |
|
FINAL_SUBMIT/HACKATHON_README.md
CHANGED
|
@@ -260,7 +260,7 @@ python scripts/generate_hackathon_plots.py # all 7 plots
|
|
| 260 |
## 4.5 · RLVE adaptive curriculum + RLVR dual-verifier (per RL guide §22-23 + §31-33)
|
| 261 |
|
| 262 |
### RLVE adaptive curriculum controller
|
| 263 |
-
File: [`
|
| 264 |
|
| 265 |
Per RL guide §22-23 (procedural verifiable environments — beyond static RLVR):
|
| 266 |
- **Tier 0** = 100 most-common 5-letter words (baseline)
|
|
@@ -274,7 +274,7 @@ Per RL guide §22-23 (procedural verifiable environments — beyond static RLVR)
|
|
| 274 |
Smoke (200 episodes, synthetic policy): 4 tier shifts captured. Receipt: [`rlve_curriculum_smoke.json`](receipts/rlve_curriculum_smoke.json).
|
| 275 |
|
| 276 |
### RLVR dual-verifier framework
|
| 277 |
-
File: [`
|
| 278 |
|
| 279 |
Per RL guide §31-33 (rule-based verifiers brittle, model-based exploitable):
|
| 280 |
- **Rule layer**: word ∈ dict, format valid, exact green/yellow scoring
|
|
|
|
| 260 |
## 4.5 · RLVE adaptive curriculum + RLVR dual-verifier (per RL guide §22-23 + §31-33)
|
| 261 |
|
| 262 |
### RLVE adaptive curriculum controller
|
| 263 |
+
File: [`versions/v5_phoenix/wordle_env/rlve_curriculum.py`](../versions/v5_phoenix/wordle_env/rlve_curriculum.py)
|
| 264 |
|
| 265 |
Per RL guide §22-23 (procedural verifiable environments — beyond static RLVR):
|
| 266 |
- **Tier 0** = 100 most-common 5-letter words (baseline)
|
|
|
|
| 274 |
Smoke (200 episodes, synthetic policy): 4 tier shifts captured. Receipt: [`rlve_curriculum_smoke.json`](receipts/rlve_curriculum_smoke.json).
|
| 275 |
|
| 276 |
### RLVR dual-verifier framework
|
| 277 |
+
File: [`versions/v5_phoenix/wordle_env/dual_verifier.py`](../versions/v5_phoenix/wordle_env/dual_verifier.py)
|
| 278 |
|
| 279 |
Per RL guide §31-33 (rule-based verifiers brittle, model-based exploitable):
|
| 280 |
- **Rule layer**: word ∈ dict, format valid, exact green/yellow scoring
|
FINAL_SUBMIT/JUDGE_FAQ_30.md
CHANGED
|
@@ -69,7 +69,7 @@ Global Fishing Watch — vessel positions feed into Hormuz/Red Sea route-disrupt
|
|
| 69 |
Compatibility with PEFT 0.19 + Unsloth current pin. `requirements.txt` locks the stack.
|
| 70 |
|
| 71 |
### 23. "Reward function code?"
|
| 72 |
-
`server/engine/rewards.py` (SupplyMind 7-component) + `
|
| 73 |
|
| 74 |
### 24. "Forecasting baselines?"
|
| 75 |
TFT (513,534 steps), TFT-v2, BigTFT (90,602), TimesFM zero-shot, Granite, Stacking-v3, Brent ensemble. NOAA 60.07% accuracy. Receipts each.
|
|
|
|
| 69 |
Compatibility with PEFT 0.19 + Unsloth current pin. `requirements.txt` locks the stack.
|
| 70 |
|
| 71 |
### 23. "Reward function code?"
|
| 72 |
+
`server/engine/rewards.py` (SupplyMind 7-component) + `versions/v5_phoenix/wordle_env/env.py` (Wordle 6-component). Both verifiable.
|
| 73 |
|
| 74 |
### 24. "Forecasting baselines?"
|
| 75 |
TFT (513,534 steps), TFT-v2, BigTFT (90,602), TimesFM zero-shot, Granite, Stacking-v3, Brent ensemble. NOAA 60.07% accuracy. Receipts each.
|
FINAL_SUBMIT/JUDGE_OBJECTION_HANDBOOK.md
CHANGED
|
@@ -26,7 +26,7 @@ Format: **Q** = the objection · **A** = the rebuttal · **Receipt** = the on-di
|
|
| 26 |
|
| 27 |
**Q5**. "Why supply chain over a research-paper-novel domain?"
|
| 28 |
**A**. Picked deliberately: (1) supply-chain has crisp economic verifiers (Brent prices, agency-published loss bands), (2) it has rich partial observability (20 live data sources), (3) it's professionally relevant (Theme 3 explicit fit). And it's underexplored in OpenEnv community — most submissions are grid worlds or web tasks.
|
| 29 |
-
**Receipt**: `DATA_SOURCES.md` lists 20 sources with their epistemic role.
|
| 30 |
|
| 31 |
---
|
| 32 |
|
|
|
|
| 26 |
|
| 27 |
**Q5**. "Why supply chain over a research-paper-novel domain?"
|
| 28 |
**A**. Picked deliberately: (1) supply-chain has crisp economic verifiers (Brent prices, agency-published loss bands), (2) it has rich partial observability (20 live data sources), (3) it's professionally relevant (Theme 3 explicit fit). And it's underexplored in OpenEnv community — most submissions are grid worlds or web tasks.
|
| 29 |
+
**Receipt**: `docs/core/DATA_SOURCES.md` lists 20 sources with their epistemic role.
|
| 30 |
|
| 31 |
---
|
| 32 |
|
FINAL_SUBMIT/MASTER_FEATURE_USECASE_MAP_250.md
CHANGED
|
@@ -18,22 +18,22 @@ Sections A through BB + RL/RLVR/RLVE knowledge alignment.
|
|
| 18 |
| A7 | 30-step episode horizon | `server/supply_environment.py` | bounded RL episode | reset config |
|
| 19 |
| A8 | $5M-$15M budget tasks | `data/disruptions.json` | sparse-reward shaping | task manifest |
|
| 20 |
| A9 | Real-world coordinates (TSMC, Samsung) | `data/companies_real.json` | Theme #3 Professional Tasks | n_real_nodes=40 |
|
| 21 |
-
| A10 | 8 v1 events crisis library | `
|
| 22 |
-
| A11 | Wordle RLVR mini-env | `
|
| 23 |
-
| A12 | RLVE adaptive curriculum | `
|
| 24 |
|
| 25 |
## B. REWARD ENGINEERING — 14 features
|
| 26 |
| # | Feature | File | Use case | Receipt |
|
| 27 |
|---|---------|------|----------|---------|
|
| 28 |
| B1 | 7-component shaped reward | `server/engine/rewards.py` | RL guide §7 multi-component | rewards module |
|
| 29 |
-
| B2 | Format gate | `
|
| 30 |
-
| B3 | Dictionary gate | `
|
| 31 |
-
| B4 | Timeout penalty | `
|
| 32 |
-
| B5 | Solve bonus + step-count bonus | `
|
| 33 |
| B6 | Green credit | env.py | per-letter success | ablation: -0.459 if removed |
|
| 34 |
| B7 | Yellow credit | env.py | partial info credit | ablation: small drop if removed |
|
| 35 |
| B8 | Process supervision (line-level) | `scripts/final_validation_bundle.py:process_supervision` | RL guide §9 Lightman 2023 | `process_supervision.json` (var amp 2735×) |
|
| 36 |
-
| B9 | Dual-verifier composite | `
|
| 37 |
| B10 | Disagreement alarm | `dual_verifier.py:DISAGREEMENT_THRESHOLD` | §43 anti-hacking monitoring | rolling alarm 0.30 |
|
| 38 |
| B11 | Ablation receipts (5 components) | `final_validation_bundle.py` | leave-one-out analysis | `ablation_matrix.json` |
|
| 39 |
| B12 | Variance reduction baseline | `final_real_reinforce_wordle.py` | Williams 1992 REINFORCE | running_baseline EMA |
|
|
@@ -98,8 +98,8 @@ Receipt: `adversarial_20_attack_gauntlet.json` (sha 082a3c57…)
|
|
| 98 |
## G. RAG / RETRIEVAL — 8 features
|
| 99 |
| # | Feature | File | Use case | Receipt |
|
| 100 |
|---|---------|------|----------|---------|
|
| 101 |
-
| G1 | FAISS index | `
|
| 102 |
-
| G2 | BGE-rerank | `
|
| 103 |
| G3 | Crisis library 8 events | `realtime/crisis_library.py` | analog retrieval | RAG against Iran/Hormuz/Suez |
|
| 104 |
| G4 | NewsAPI live ingest | `realtime/news_ingest.py` | recent events | event store |
|
| 105 |
| G5 | GDELT integration | `realtime/gdelt.py` | global events | event store |
|
|
|
|
| 18 |
| A7 | 30-step episode horizon | `server/supply_environment.py` | bounded RL episode | reset config |
|
| 19 |
| A8 | $5M-$15M budget tasks | `data/disruptions.json` | sparse-reward shaping | task manifest |
|
| 20 |
| A9 | Real-world coordinates (TSMC, Samsung) | `data/companies_real.json` | Theme #3 Professional Tasks | n_real_nodes=40 |
|
| 21 |
+
| A10 | 8 v1 events crisis library | `versions/v4_arcadia_live/realtime/crisis_library.py` | RAG analog retrieval | 8 events indexed |
|
| 22 |
+
| A11 | Wordle RLVR mini-env | `versions/v5_phoenix/wordle_env/env.py` | canonical hackathon flow | `wordle_real_reinforce_curve.json` |
|
| 23 |
+
| A12 | RLVE adaptive curriculum | `versions/v5_phoenix/wordle_env/rlve_curriculum.py` | §22-23 Procaccia-style | `rlve_curriculum_smoke.json` (4 tier shifts) |
|
| 24 |
|
| 25 |
## B. REWARD ENGINEERING — 14 features
|
| 26 |
| # | Feature | File | Use case | Receipt |
|
| 27 |
|---|---------|------|----------|---------|
|
| 28 |
| B1 | 7-component shaped reward | `server/engine/rewards.py` | RL guide §7 multi-component | rewards module |
|
| 29 |
+
| B2 | Format gate | `versions/v5_phoenix/wordle_env/env.py` | reject malformed actions | adv-20 attacks 1-9 blocked |
|
| 30 |
+
| B3 | Dictionary gate | `versions/v5_phoenix/wordle_env/env.py` | reject non-dict words | adv-20 attack #10 blocked |
|
| 31 |
+
| B4 | Timeout penalty | `versions/v5_phoenix/wordle_env/env.py` | RL guide §15 timeout monitor | -0.2 if 6 guesses fail |
|
| 32 |
+
| B5 | Solve bonus + step-count bonus | `versions/v5_phoenix/wordle_env/env.py` | richer signal | ablation_matrix.json |
|
| 33 |
| B6 | Green credit | env.py | per-letter success | ablation: -0.459 if removed |
|
| 34 |
| B7 | Yellow credit | env.py | partial info credit | ablation: small drop if removed |
|
| 35 |
| B8 | Process supervision (line-level) | `scripts/final_validation_bundle.py:process_supervision` | RL guide §9 Lightman 2023 | `process_supervision.json` (var amp 2735×) |
|
| 36 |
+
| B9 | Dual-verifier composite | `versions/v5_phoenix/wordle_env/dual_verifier.py` | rule × (0.5 + 0.5×model) | `dual_verifier_smoke.json` |
|
| 37 |
| B10 | Disagreement alarm | `dual_verifier.py:DISAGREEMENT_THRESHOLD` | §43 anti-hacking monitoring | rolling alarm 0.30 |
|
| 38 |
| B11 | Ablation receipts (5 components) | `final_validation_bundle.py` | leave-one-out analysis | `ablation_matrix.json` |
|
| 39 |
| B12 | Variance reduction baseline | `final_real_reinforce_wordle.py` | Williams 1992 REINFORCE | running_baseline EMA |
|
|
|
|
| 98 |
## G. RAG / RETRIEVAL — 8 features
|
| 99 |
| # | Feature | File | Use case | Receipt |
|
| 100 |
|---|---------|------|----------|---------|
|
| 101 |
+
| G1 | FAISS index | `versions/v4_arcadia_live/realtime/store.py` | top-K retrieval | store.query_recent |
|
| 102 |
+
| G2 | BGE-rerank | `versions/v4_arcadia_live/realtime/rerank.py` | quality boost | falls back gracefully on Win |
|
| 103 |
| G3 | Crisis library 8 events | `realtime/crisis_library.py` | analog retrieval | RAG against Iran/Hormuz/Suez |
|
| 104 |
| G4 | NewsAPI live ingest | `realtime/news_ingest.py` | recent events | event store |
|
| 105 |
| G5 | GDELT integration | `realtime/gdelt.py` | global events | event store |
|
FINAL_SUBMIT/README.md
CHANGED
|
@@ -37,13 +37,13 @@ http://127.0.0.1:8000/demo/master
|
|
| 37 |
| Conformal action coverage | **0.9001** | `tests/receipts/conformal_calibration.json` |
|
| 38 |
| Cross-corpus α (frontier 6, v2 EMDAT) | **0.5436** | `tests/receipts/cross_corpus_alpha.json` |
|
| 39 |
| 12-frontier panel α (R4 corpus) | **0.5669** | `tests/receipts/panel_agreement_R4.json` |
|
| 40 |
-
| HetGAT vs v1 GCN MAE | **+7.77 / +12.15 / +10.03 %** | `
|
| 41 |
-
| RAP-XC training loss | BC **5.62 → 0.23** | `
|
| 42 |
| RAP-XC parameters | **3,137,049** | same |
|
| 43 |
| Tohoku 2011 replicated | **$276 B vs $235 B published (+18%)** | `tests/receipts/platinum_counterfactual.json` |
|
| 44 |
-
| Live data sources | **20** | `
|
| 45 |
-
| Crisis library | **1,500 EMDAT events** | `
|
| 46 |
-
| Foundation models verified | **13/13** | `v3_arcadia/00_emergence/verify_*.py` |
|
| 47 |
| Custom Ollama analyst models | **5 (v1→v5)** | `rl/lora/Modelfile.v[2-4]`, `Modelfile.analyst_v5` |
|
| 48 |
| LoRA training pairs | **225** | `rl/data/lora_training_data.json` |
|
| 49 |
| DPO preference pairs | **21** | `dpo_judge/data/preference_pairs.jsonl` |
|
|
@@ -98,13 +98,13 @@ Detailed: see [REPRODUCE.md](REPRODUCE.md).
|
|
| 98 |
| Section | Where |
|
| 99 |
|---|---|
|
| 100 |
| Game engine (OpenEnv) | `server/app.py`, `server/supply_environment.py`, `server/engine/` |
|
| 101 |
-
| 9 RL agents | `
|
| 102 |
-
| 13 foundation models | `models/`, `v3_arcadia/00_emergence/verify_*.py` |
|
| 103 |
-
| Custom Ollama analyst models | `rl/lora/Modelfile.v[2-4]`, `
|
| 104 |
-
| LoRA + DPO + GRPO training | `rl/lora/`, `
|
| 105 |
-
| 1500-event crisis library | `
|
| 106 |
-
| 4-method counterfactual | `
|
| 107 |
-
| Hormuz War Room | `
|
| 108 |
| Master demo page | `server/static/master.html` |
|
| 109 |
| Receipts | `tests/receipts/*.json` |
|
| 110 |
|
|
|
|
| 37 |
| Conformal action coverage | **0.9001** | `tests/receipts/conformal_calibration.json` |
|
| 38 |
| Cross-corpus α (frontier 6, v2 EMDAT) | **0.5436** | `tests/receipts/cross_corpus_alpha.json` |
|
| 39 |
| 12-frontier panel α (R4 corpus) | **0.5669** | `tests/receipts/panel_agreement_R4.json` |
|
| 40 |
+
| HetGAT vs v1 GCN MAE | **+7.77 / +12.15 / +10.03 %** | `versions/v5_phoenix/experiments/hetgat_v1/report.json` |
|
| 41 |
+
| RAP-XC training loss | BC **5.62 → 0.23** | `versions/v5_phoenix/experiments/rap_xc_v1/rapxc.pt` |
|
| 42 |
| RAP-XC parameters | **3,137,049** | same |
|
| 43 |
| Tohoku 2011 replicated | **$276 B vs $235 B published (+18%)** | `tests/receipts/platinum_counterfactual.json` |
|
| 44 |
+
| Live data sources | **20** | `versions/v4_arcadia_live/realtime/orchestrator_v2.py` |
|
| 45 |
+
| Crisis library | **1,500 EMDAT events** | `versions/v4_arcadia_live/scenarios/crisis_library_v2.json` |
|
| 46 |
+
| Foundation models verified | **13/13** | `versions/v3_arcadia/00_emergence/verify_*.py` |
|
| 47 |
| Custom Ollama analyst models | **5 (v1→v5)** | `rl/lora/Modelfile.v[2-4]`, `Modelfile.analyst_v5` |
|
| 48 |
| LoRA training pairs | **225** | `rl/data/lora_training_data.json` |
|
| 49 |
| DPO preference pairs | **21** | `dpo_judge/data/preference_pairs.jsonl` |
|
|
|
|
| 98 |
| Section | Where |
|
| 99 |
|---|---|
|
| 100 |
| Game engine (OpenEnv) | `server/app.py`, `server/supply_environment.py`, `server/engine/` |
|
| 101 |
+
| 9 RL agents | `versions/v5_phoenix/arena/`, `versions/v5_phoenix/rap_xc/` |
|
| 102 |
+
| 13 foundation models | `models/`, `versions/v3_arcadia/00_emergence/verify_*.py` |
|
| 103 |
+
| Custom Ollama analyst models | `rl/lora/Modelfile.v[2-4]`, `versions/v4_arcadia_live/features/Modelfile.analyst_v5` |
|
| 104 |
+
| LoRA + DPO + GRPO training | `rl/lora/`, `versions/v5_phoenix/roll_integration/dpo_judge/` |
|
| 105 |
+
| 1500-event crisis library | `versions/v4_arcadia_live/scenarios/crisis_library_v2.{json,faiss}` |
|
| 106 |
+
| 4-method counterfactual | `versions/v5_phoenix/counterfactual_v2/platinum.py` |
|
| 107 |
+
| Hormuz War Room | `versions/v4_arcadia_live/realtime/hormuz_war_room_router.py`, `server/static/hormuz_war_room.html` |
|
| 108 |
| Master demo page | `server/static/master.html` |
|
| 109 |
| Receipts | `tests/receipts/*.json` |
|
| 110 |
|
FINAL_SUBMIT/RELIANCE_HORMUZ_DEEP_DIVE.md
CHANGED
|
@@ -119,4 +119,4 @@ The remaining seven subsidiaries collectively account for ~15% of impact.
|
|
| 119 |
|
| 120 |
**Key insight**: highest *score* node (RIIL pipelines 0.916) has lowest *absolute* impact (₹35 Cr) because it is a small-revenue stub. Highest *absolute* impact (Jamnagar ₹12,194 Cr) has lower score (0.824) but the largest revenue base. **Score and absolute impact tell different stories — both matter.**
|
| 121 |
|
| 122 |
-
Receipt: deterministic (no LLM in scoring). Numbers anchor to RIL FY24 Integrated Annual Report. Reproduce: `python
|
|
|
|
| 119 |
|
| 120 |
**Key insight**: highest *score* node (RIIL pipelines 0.916) has lowest *absolute* impact (₹35 Cr) because it is a small-revenue stub. Highest *absolute* impact (Jamnagar ₹12,194 Cr) has lower score (0.824) but the largest revenue base. **Score and absolute impact tell different stories — both matter.**
|
| 121 |
|
| 122 |
+
Receipt: deterministic (no LLM in scoring). Numbers anchor to RIL FY24 Integrated Annual Report. Reproduce: `python versions/v4_arcadia_live/scenarios/reliance_industries_exposure.py`.
|
FINAL_SUBMIT/REPRODUCE.md
CHANGED
|
@@ -70,10 +70,10 @@ python scripts/bootstrap_leaderboard.py
|
|
| 70 |
python scripts/ollama_v5_vs_frontier.py
|
| 71 |
|
| 72 |
# 7. HetGAT all 3 graphs (~30 min on RTX 4080)
|
| 73 |
-
python -m
|
| 74 |
|
| 75 |
# 8. RAP-XC training on harvested transitions (~20 sec on RTX 4080)
|
| 76 |
-
python -c "from
|
| 77 |
```
|
| 78 |
|
| 79 |
All produce JSON receipts at `tests/receipts/*.json`.
|
|
|
|
| 70 |
python scripts/ollama_v5_vs_frontier.py
|
| 71 |
|
| 72 |
# 7. HetGAT all 3 graphs (~30 min on RTX 4080)
|
| 73 |
+
python -m versions.v5_phoenix.gnn_v2.train_hetgat --graph all --epochs 200
|
| 74 |
|
| 75 |
# 8. RAP-XC training on harvested transitions (~20 sec on RTX 4080)
|
| 76 |
+
python -c "from versions.v5_phoenix.rap_xc.train import train_rapxc; train_rapxc()"
|
| 77 |
```
|
| 78 |
|
| 79 |
All produce JSON receipts at `tests/receipts/*.json`.
|
FINAL_SUBMIT/REPRODUCE_ONE_BASH.sh
CHANGED
|
@@ -14,11 +14,11 @@ echo "Repo: $(pwd)"
|
|
| 14 |
echo
|
| 15 |
|
| 16 |
echo "[1/8] Wordle env + RLVE curriculum smoke ..."
|
| 17 |
-
python -m
|
| 18 |
|
| 19 |
echo
|
| 20 |
echo "[2/8] Dual verifier smoke ..."
|
| 21 |
-
python -m
|
| 22 |
|
| 23 |
echo
|
| 24 |
echo "[3/8] OpenEnv MCP compliance ..."
|
|
@@ -38,7 +38,7 @@ python scripts/final_validation_bundle.py
|
|
| 38 |
|
| 39 |
echo
|
| 40 |
echo "[7/8] Wordle GRPO baseline (heuristic policy receipt) ..."
|
| 41 |
-
python -m
|
| 42 |
|
| 43 |
echo
|
| 44 |
echo "[8/8] Receipt index ..."
|
|
|
|
| 14 |
echo
|
| 15 |
|
| 16 |
echo "[1/8] Wordle env + RLVE curriculum smoke ..."
|
| 17 |
+
python -m versions.v5_phoenix.wordle_env.rlve_curriculum
|
| 18 |
|
| 19 |
echo
|
| 20 |
echo "[2/8] Dual verifier smoke ..."
|
| 21 |
+
python -m versions.v5_phoenix.wordle_env.dual_verifier
|
| 22 |
|
| 23 |
echo
|
| 24 |
echo "[3/8] OpenEnv MCP compliance ..."
|
|
|
|
| 38 |
|
| 39 |
echo
|
| 40 |
echo "[7/8] Wordle GRPO baseline (heuristic policy receipt) ..."
|
| 41 |
+
python -m versions.v5_phoenix.wordle_env.train_grpo --steps 50 || true
|
| 42 |
|
| 43 |
echo
|
| 44 |
echo "[8/8] Receipt index ..."
|
FINAL_SUBMIT/RL_GUIDE_59POINT_ALIGNMENT.md
CHANGED
|
@@ -25,7 +25,7 @@ Each of the 59 hackathon-guide points → which file implements it → which rec
|
|
| 25 |
**Receipt**: HF Space-ready manifest.
|
| 26 |
|
| 27 |
## §6. Easy first
|
| 28 |
-
**File**: `
|
| 29 |
**Receipt**: `rlve_curriculum_smoke.json` — 4 tier shifts.
|
| 30 |
|
| 31 |
## §7. Reward design carefully
|
|
@@ -81,7 +81,7 @@ Each of the 59 hackathon-guide points → which file implements it → which rec
|
|
| 81 |
**Receipt**: `lora_unsloth_train.json`.
|
| 82 |
|
| 83 |
## §31–33. Dual verifier
|
| 84 |
-
**File**: `
|
| 85 |
**Receipt**: `dual_verifier_smoke.json` — BRAID FP caught.
|
| 86 |
|
| 87 |
## §34–37. Curriculum band 0.45–0.75
|
|
|
|
| 25 |
**Receipt**: HF Space-ready manifest.
|
| 26 |
|
| 27 |
## §6. Easy first
|
| 28 |
+
**File**: `versions/v5_phoenix/wordle_env/rlve_curriculum.py` Tier-0
|
| 29 |
**Receipt**: `rlve_curriculum_smoke.json` — 4 tier shifts.
|
| 30 |
|
| 31 |
## §7. Reward design carefully
|
|
|
|
| 81 |
**Receipt**: `lora_unsloth_train.json`.
|
| 82 |
|
| 83 |
## §31–33. Dual verifier
|
| 84 |
+
**File**: `versions/v5_phoenix/wordle_env/dual_verifier.py`
|
| 85 |
**Receipt**: `dual_verifier_smoke.json` — BRAID FP caught.
|
| 86 |
|
| 87 |
## §34–37. Curriculum band 0.45–0.75
|
FINAL_SUBMIT/docker/Dockerfile.api
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# System deps for sentence-transformers, faiss, torch
|
| 6 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 7 |
+
build-essential git curl ca-certificates libgomp1 \
|
| 8 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 9 |
+
|
| 10 |
+
COPY requirements.txt /app/requirements.txt
|
| 11 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 12 |
+
|
| 13 |
+
# Repo (excluding models — they get mounted as a volume)
|
| 14 |
+
COPY . /app/
|
| 15 |
+
|
| 16 |
+
# Models live at /app/models — mount your local models/ dir as this volume
|
| 17 |
+
VOLUME /app/models
|
| 18 |
+
|
| 19 |
+
EXPOSE 8000
|
| 20 |
+
|
| 21 |
+
# Pre-warm not done in image — runs in lifespan handler at startup
|
| 22 |
+
ENV PYTHONIOENCODING=utf-8
|
| 23 |
+
ENV OLLAMA_MAX_LOADED_MODELS=1
|
| 24 |
+
|
| 25 |
+
CMD ["uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "8000"]
|
FINAL_SUBMIT/docker/docker-compose.yml
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version: "3.9"
|
| 2 |
+
|
| 3 |
+
services:
|
| 4 |
+
api:
|
| 5 |
+
build:
|
| 6 |
+
context: ../..
|
| 7 |
+
dockerfile: FINAL_SUBMIT/docker/Dockerfile.api
|
| 8 |
+
container_name: supplymind-api
|
| 9 |
+
ports:
|
| 10 |
+
- "8000:8000"
|
| 11 |
+
env_file:
|
| 12 |
+
- ../../.env
|
| 13 |
+
volumes:
|
| 14 |
+
- ../../models:/app/models:ro
|
| 15 |
+
- ../../tests/receipts:/app/tests/receipts
|
| 16 |
+
environment:
|
| 17 |
+
- PYTHONIOENCODING=utf-8
|
| 18 |
+
- OLLAMA_MAX_LOADED_MODELS=1
|
| 19 |
+
- OLLAMA_BASE_URL=http://ollama:11434
|
| 20 |
+
depends_on:
|
| 21 |
+
- ollama
|
| 22 |
+
restart: unless-stopped
|
| 23 |
+
|
| 24 |
+
ollama:
|
| 25 |
+
image: ollama/ollama:latest
|
| 26 |
+
container_name: supplymind-ollama
|
| 27 |
+
ports:
|
| 28 |
+
- "11434:11434"
|
| 29 |
+
volumes:
|
| 30 |
+
- ollama-data:/root/.ollama
|
| 31 |
+
restart: unless-stopped
|
| 32 |
+
deploy:
|
| 33 |
+
resources:
|
| 34 |
+
reservations:
|
| 35 |
+
devices:
|
| 36 |
+
- driver: nvidia
|
| 37 |
+
count: 1
|
| 38 |
+
capabilities: [gpu]
|
| 39 |
+
|
| 40 |
+
volumes:
|
| 41 |
+
ollama-data:
|
FINAL_SUBMIT/receipts/F2_multi_agent_apple_samsung_toyota.json
CHANGED
|
@@ -1,117 +1,117 @@
|
|
| 1 |
-
{
|
| 2 |
-
"constants": {
|
| 3 |
-
"cap_total_wafers_week": 1000,
|
| 4 |
-
"wafer_revenue_usd": 16500,
|
| 5 |
-
"shortfall_loss_usd_per_wafer": 55000,
|
| 6 |
-
"crisis_duration_weeks": 6
|
| 7 |
-
},
|
| 8 |
-
"narrative": "2021-chip-shortage dynamic: TSMC backup capacity (1000 wafers/week) contested by Apple (aggressive) + Samsung (conservative) + Toyota (reactive). Apple bids hard early, captures >50% of step-1 capacity. Toyota waits, pays higher step-2 prices. Samsung splits budget.",
|
| 9 |
-
"step_log": [
|
| 10 |
-
{
|
| 11 |
-
"event": "step_1_open",
|
| 12 |
-
"capacity_remaining": 1000,
|
| 13 |
-
"price_signal": 1.0
|
| 14 |
-
},
|
| 15 |
-
{
|
| 16 |
-
"event": "step_1_bid",
|
| 17 |
-
"agent": "Apple",
|
| 18 |
-
"bid_usd": 15399999.999999998
|
| 19 |
-
},
|
| 20 |
-
{
|
| 21 |
-
"event": "step_1_bid",
|
| 22 |
-
"agent": "Samsung",
|
| 23 |
-
"bid_usd": 3500000.0
|
| 24 |
-
},
|
| 25 |
-
{
|
| 26 |
-
"event": "step_1_bid",
|
| 27 |
-
"agent": "Toyota",
|
| 28 |
-
"bid_usd": 0.0
|
| 29 |
-
},
|
| 30 |
-
{
|
| 31 |
-
"event": "step_1_allocated",
|
| 32 |
-
"agent": "Apple",
|
| 33 |
-
"allocated_wafers": 407.4074074074074
|
| 34 |
-
},
|
| 35 |
-
{
|
| 36 |
-
"event": "step_1_allocated",
|
| 37 |
-
"agent": "Samsung",
|
| 38 |
-
"allocated_wafers": 92.59259259259258
|
| 39 |
-
},
|
| 40 |
-
{
|
| 41 |
-
"event": "step_1_allocated",
|
| 42 |
-
"agent": "Toyota",
|
| 43 |
-
"allocated_wafers": 0.0
|
| 44 |
-
},
|
| 45 |
-
{
|
| 46 |
-
"event": "step_2_open",
|
| 47 |
-
"capacity_remaining": 500.0,
|
| 48 |
-
"price_signal": 2.291
|
| 49 |
-
},
|
| 50 |
-
{
|
| 51 |
-
"event": "step_2_bid",
|
| 52 |
-
"agent": "Apple",
|
| 53 |
-
"bid_usd": 3300000.0
|
| 54 |
-
},
|
| 55 |
-
{
|
| 56 |
-
"event": "step_2_bid",
|
| 57 |
-
"agent": "Samsung",
|
| 58 |
-
"bid_usd": 2800000.0
|
| 59 |
-
},
|
| 60 |
-
{
|
| 61 |
-
"event": "step_2_bid",
|
| 62 |
-
"agent": "Toyota",
|
| 63 |
-
"bid_usd": 1833333.3333333333
|
| 64 |
-
}
|
| 65 |
-
],
|
| 66 |
-
"outcomes": [
|
| 67 |
-
{
|
| 68 |
-
"name": "Apple",
|
| 69 |
-
"strategy": "aggressive",
|
| 70 |
-
"budget_usd": 22000000,
|
| 71 |
-
"bid_usd": 18700000.0,
|
| 72 |
-
"allocated_wafers": 615.4,
|
| 73 |
-
"revenue_earned_usd": 60923669.0,
|
| 74 |
-
"shortfall_loss_usd": 39486850.0,
|
| 75 |
-
"net_pnl_usd": 2736819.0
|
| 76 |
-
},
|
| 77 |
-
{
|
| 78 |
-
"name": "Samsung",
|
| 79 |
-
"strategy": "conservative",
|
| 80 |
-
"budget_usd": 14000000,
|
| 81 |
-
"bid_usd": 6300000.0,
|
| 82 |
-
"allocated_wafers": 269.1,
|
| 83 |
-
"revenue_earned_usd": 26637255.0,
|
| 84 |
-
"shortfall_loss_usd": 31868192.0,
|
| 85 |
-
"net_pnl_usd": -11530937.0
|
| 86 |
-
},
|
| 87 |
-
{
|
| 88 |
-
"name": "Toyota",
|
| 89 |
-
"strategy": "reactive",
|
| 90 |
-
"budget_usd": 7000000,
|
| 91 |
-
"bid_usd": 1833333.0,
|
| 92 |
-
"allocated_wafers": 115.5,
|
| 93 |
-
"revenue_earned_usd": 11439076.0,
|
| 94 |
-
"shortfall_loss_usd": 16978291.0,
|
| 95 |
-
"net_pnl_usd": -7372549.0
|
| 96 |
-
}
|
| 97 |
-
],
|
| 98 |
-
"ranking": [
|
| 99 |
-
{
|
| 100 |
-
"rank": 1,
|
| 101 |
-
"agent": "Apple",
|
| 102 |
-
"net_pnl_usd": 2736819.0
|
| 103 |
-
},
|
| 104 |
-
{
|
| 105 |
-
"rank": 2,
|
| 106 |
-
"agent": "Toyota",
|
| 107 |
-
"net_pnl_usd": -7372549.0
|
| 108 |
-
},
|
| 109 |
-
{
|
| 110 |
-
"rank": 3,
|
| 111 |
-
"agent": "Samsung",
|
| 112 |
-
"net_pnl_usd": -11530937.0
|
| 113 |
-
}
|
| 114 |
-
],
|
| 115 |
-
"winner": "Apple",
|
| 116 |
-
"loser": "Samsung"
|
| 117 |
}
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"constants": {
|
| 3 |
+
"cap_total_wafers_week": 1000,
|
| 4 |
+
"wafer_revenue_usd": 16500,
|
| 5 |
+
"shortfall_loss_usd_per_wafer": 55000,
|
| 6 |
+
"crisis_duration_weeks": 6
|
| 7 |
+
},
|
| 8 |
+
"narrative": "2021-chip-shortage dynamic: TSMC backup capacity (1000 wafers/week) contested by Apple (aggressive) + Samsung (conservative) + Toyota (reactive). Apple bids hard early, captures >50% of step-1 capacity. Toyota waits, pays higher step-2 prices. Samsung splits budget.",
|
| 9 |
+
"step_log": [
|
| 10 |
+
{
|
| 11 |
+
"event": "step_1_open",
|
| 12 |
+
"capacity_remaining": 1000,
|
| 13 |
+
"price_signal": 1.0
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"event": "step_1_bid",
|
| 17 |
+
"agent": "Apple",
|
| 18 |
+
"bid_usd": 15399999.999999998
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"event": "step_1_bid",
|
| 22 |
+
"agent": "Samsung",
|
| 23 |
+
"bid_usd": 3500000.0
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"event": "step_1_bid",
|
| 27 |
+
"agent": "Toyota",
|
| 28 |
+
"bid_usd": 0.0
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"event": "step_1_allocated",
|
| 32 |
+
"agent": "Apple",
|
| 33 |
+
"allocated_wafers": 407.4074074074074
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"event": "step_1_allocated",
|
| 37 |
+
"agent": "Samsung",
|
| 38 |
+
"allocated_wafers": 92.59259259259258
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"event": "step_1_allocated",
|
| 42 |
+
"agent": "Toyota",
|
| 43 |
+
"allocated_wafers": 0.0
|
| 44 |
+
},
|
| 45 |
+
{
|
| 46 |
+
"event": "step_2_open",
|
| 47 |
+
"capacity_remaining": 500.0,
|
| 48 |
+
"price_signal": 2.291
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"event": "step_2_bid",
|
| 52 |
+
"agent": "Apple",
|
| 53 |
+
"bid_usd": 3300000.0
|
| 54 |
+
},
|
| 55 |
+
{
|
| 56 |
+
"event": "step_2_bid",
|
| 57 |
+
"agent": "Samsung",
|
| 58 |
+
"bid_usd": 2800000.0
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"event": "step_2_bid",
|
| 62 |
+
"agent": "Toyota",
|
| 63 |
+
"bid_usd": 1833333.3333333333
|
| 64 |
+
}
|
| 65 |
+
],
|
| 66 |
+
"outcomes": [
|
| 67 |
+
{
|
| 68 |
+
"name": "Apple",
|
| 69 |
+
"strategy": "aggressive",
|
| 70 |
+
"budget_usd": 22000000,
|
| 71 |
+
"bid_usd": 18700000.0,
|
| 72 |
+
"allocated_wafers": 615.4,
|
| 73 |
+
"revenue_earned_usd": 60923669.0,
|
| 74 |
+
"shortfall_loss_usd": 39486850.0,
|
| 75 |
+
"net_pnl_usd": 2736819.0
|
| 76 |
+
},
|
| 77 |
+
{
|
| 78 |
+
"name": "Samsung",
|
| 79 |
+
"strategy": "conservative",
|
| 80 |
+
"budget_usd": 14000000,
|
| 81 |
+
"bid_usd": 6300000.0,
|
| 82 |
+
"allocated_wafers": 269.1,
|
| 83 |
+
"revenue_earned_usd": 26637255.0,
|
| 84 |
+
"shortfall_loss_usd": 31868192.0,
|
| 85 |
+
"net_pnl_usd": -11530937.0
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"name": "Toyota",
|
| 89 |
+
"strategy": "reactive",
|
| 90 |
+
"budget_usd": 7000000,
|
| 91 |
+
"bid_usd": 1833333.0,
|
| 92 |
+
"allocated_wafers": 115.5,
|
| 93 |
+
"revenue_earned_usd": 11439076.0,
|
| 94 |
+
"shortfall_loss_usd": 16978291.0,
|
| 95 |
+
"net_pnl_usd": -7372549.0
|
| 96 |
+
}
|
| 97 |
+
],
|
| 98 |
+
"ranking": [
|
| 99 |
+
{
|
| 100 |
+
"rank": 1,
|
| 101 |
+
"agent": "Apple",
|
| 102 |
+
"net_pnl_usd": 2736819.0
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"rank": 2,
|
| 106 |
+
"agent": "Toyota",
|
| 107 |
+
"net_pnl_usd": -7372549.0
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"rank": 3,
|
| 111 |
+
"agent": "Samsung",
|
| 112 |
+
"net_pnl_usd": -11530937.0
|
| 113 |
+
}
|
| 114 |
+
],
|
| 115 |
+
"winner": "Apple",
|
| 116 |
+
"loser": "Samsung"
|
| 117 |
}
|
FINAL_SUBMIT/receipts/ONNX_BUNDLE_MANIFEST.json
CHANGED
|
@@ -1,72 +1,72 @@
|
|
| 1 |
-
{
|
| 2 |
-
"exported": [
|
| 3 |
-
{
|
| 4 |
-
"name": "ppo_easy_typhoon_response (MaskablePPO)",
|
| 5 |
-
"file": "ppo_easy_typhoon_response.onnx",
|
| 6 |
-
"size_kb": 948,
|
| 7 |
-
"input_shape": [
|
| 8 |
-
1,
|
| 9 |
-
408
|
| 10 |
-
],
|
| 11 |
-
"output_shape": [
|
| 12 |
-
1,
|
| 13 |
-
280
|
| 14 |
-
],
|
| 15 |
-
"source": "v3_arcadia/50_gethsemane/export_v3_ppo_onnx.py"
|
| 16 |
-
},
|
| 17 |
-
{
|
| 18 |
-
"name": "ppo_medium_multi_front (MaskablePPO)",
|
| 19 |
-
"file": "ppo_medium_multi_front.onnx",
|
| 20 |
-
"size_kb": 948,
|
| 21 |
-
"input_shape": [
|
| 22 |
-
1,
|
| 23 |
-
408
|
| 24 |
-
],
|
| 25 |
-
"output_shape": [
|
| 26 |
-
1,
|
| 27 |
-
280
|
| 28 |
-
],
|
| 29 |
-
"source": "v3_arcadia/50_gethsemane/export_v3_ppo_onnx.py"
|
| 30 |
-
},
|
| 31 |
-
{
|
| 32 |
-
"name": "ppo_hard_cascading_crisis (MaskablePPO)",
|
| 33 |
-
"file": "ppo_hard_cascading_crisis.onnx",
|
| 34 |
-
"size_kb": 948,
|
| 35 |
-
"input_shape": [
|
| 36 |
-
1,
|
| 37 |
-
408
|
| 38 |
-
],
|
| 39 |
-
"output_shape": [
|
| 40 |
-
1,
|
| 41 |
-
280
|
| 42 |
-
],
|
| 43 |
-
"source": "v3_arcadia/50_gethsemane/export_v3_ppo_onnx.py"
|
| 44 |
-
},
|
| 45 |
-
{
|
| 46 |
-
"name": "GCN arrival-time regressor",
|
| 47 |
-
"file": "gcn_arrival.onnx",
|
| 48 |
-
"size_kb": 10,
|
| 49 |
-
"input_shape": [
|
| 50 |
-
"[N, 4]",
|
| 51 |
-
"[N, N]"
|
| 52 |
-
],
|
| 53 |
-
"output_shape": [
|
| 54 |
-
"[N]"
|
| 55 |
-
],
|
| 56 |
-
"source": "v3_arcadia/70_provider/r6_gnn_arrival_time.py"
|
| 57 |
-
}
|
| 58 |
-
],
|
| 59 |
-
"skipped": [
|
| 60 |
-
{
|
| 61 |
-
"name": "Ridge stacker",
|
| 62 |
-
"reason": "skl2onnx not installed: No module named 'skl2onnx'"
|
| 63 |
-
},
|
| 64 |
-
{
|
| 65 |
-
"name": "TFT v1",
|
| 66 |
-
"reason": "pytorch-forecasting TimeSeriesDataSet is required at inference; ONNX export requires a wrapper that packages the normalizer scaler + encoder/decoder split. Deferred as v4 work."
|
| 67 |
-
}
|
| 68 |
-
],
|
| 69 |
-
"elapsed_s": 0.8302168846130371,
|
| 70 |
-
"bundle_dir": "v3_arcadia\
|
| 71 |
-
"total_bundle_size_kb": 2854
|
| 72 |
}
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"exported": [
|
| 3 |
+
{
|
| 4 |
+
"name": "ppo_easy_typhoon_response (MaskablePPO)",
|
| 5 |
+
"file": "ppo_easy_typhoon_response.onnx",
|
| 6 |
+
"size_kb": 948,
|
| 7 |
+
"input_shape": [
|
| 8 |
+
1,
|
| 9 |
+
408
|
| 10 |
+
],
|
| 11 |
+
"output_shape": [
|
| 12 |
+
1,
|
| 13 |
+
280
|
| 14 |
+
],
|
| 15 |
+
"source": "versions/v3_arcadia/50_gethsemane/export_v3_ppo_onnx.py"
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"name": "ppo_medium_multi_front (MaskablePPO)",
|
| 19 |
+
"file": "ppo_medium_multi_front.onnx",
|
| 20 |
+
"size_kb": 948,
|
| 21 |
+
"input_shape": [
|
| 22 |
+
1,
|
| 23 |
+
408
|
| 24 |
+
],
|
| 25 |
+
"output_shape": [
|
| 26 |
+
1,
|
| 27 |
+
280
|
| 28 |
+
],
|
| 29 |
+
"source": "versions/v3_arcadia/50_gethsemane/export_v3_ppo_onnx.py"
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"name": "ppo_hard_cascading_crisis (MaskablePPO)",
|
| 33 |
+
"file": "ppo_hard_cascading_crisis.onnx",
|
| 34 |
+
"size_kb": 948,
|
| 35 |
+
"input_shape": [
|
| 36 |
+
1,
|
| 37 |
+
408
|
| 38 |
+
],
|
| 39 |
+
"output_shape": [
|
| 40 |
+
1,
|
| 41 |
+
280
|
| 42 |
+
],
|
| 43 |
+
"source": "versions/v3_arcadia/50_gethsemane/export_v3_ppo_onnx.py"
|
| 44 |
+
},
|
| 45 |
+
{
|
| 46 |
+
"name": "GCN arrival-time regressor",
|
| 47 |
+
"file": "gcn_arrival.onnx",
|
| 48 |
+
"size_kb": 10,
|
| 49 |
+
"input_shape": [
|
| 50 |
+
"[N, 4]",
|
| 51 |
+
"[N, N]"
|
| 52 |
+
],
|
| 53 |
+
"output_shape": [
|
| 54 |
+
"[N]"
|
| 55 |
+
],
|
| 56 |
+
"source": "versions/v3_arcadia/70_provider/r6_gnn_arrival_time.py"
|
| 57 |
+
}
|
| 58 |
+
],
|
| 59 |
+
"skipped": [
|
| 60 |
+
{
|
| 61 |
+
"name": "Ridge stacker",
|
| 62 |
+
"reason": "skl2onnx not installed: No module named 'skl2onnx'"
|
| 63 |
+
},
|
| 64 |
+
{
|
| 65 |
+
"name": "TFT v1",
|
| 66 |
+
"reason": "pytorch-forecasting TimeSeriesDataSet is required at inference; ONNX export requires a wrapper that packages the normalizer scaler + encoder/decoder split. Deferred as v4 work."
|
| 67 |
+
}
|
| 68 |
+
],
|
| 69 |
+
"elapsed_s": 0.8302168846130371,
|
| 70 |
+
"bundle_dir": "versions/v3_arcadia/\checkpoints\\onnx_bundle",
|
| 71 |
+
"total_bundle_size_kb": 2854
|
| 72 |
}
|
FINAL_SUBMIT/receipts/R2_SHAP_FAIRNESS_CALIBRATION.json
CHANGED
|
@@ -1,502 +1,502 @@
|
|
| 1 |
-
{
|
| 2 |
-
"shap_top15": {
|
| 3 |
-
"late_delivery_risk": {
|
| 4 |
-
"algo": "xgb",
|
| 5 |
-
"top15_features": [
|
| 6 |
-
{
|
| 7 |
-
"name": "Shipping Mode__First Class",
|
| 8 |
-
"importance": 0.7326152324676514
|
| 9 |
-
},
|
| 10 |
-
{
|
| 11 |
-
"name": "sched_days",
|
| 12 |
-
"importance": 0.6606742739677429
|
| 13 |
-
},
|
| 14 |
-
{
|
| 15 |
-
"name": "Type__TRANSFER",
|
| 16 |
-
"importance": 0.47632965445518494
|
| 17 |
-
},
|
| 18 |
-
{
|
| 19 |
-
"name": "Order Customer Id",
|
| 20 |
-
"importance": 0.17082303762435913
|
| 21 |
-
},
|
| 22 |
-
{
|
| 23 |
-
"name": "Latitude",
|
| 24 |
-
"importance": 0.160926952958107
|
| 25 |
-
},
|
| 26 |
-
{
|
| 27 |
-
"name": "Shipping Mode__Second Class",
|
| 28 |
-
"importance": 0.14983786642551422
|
| 29 |
-
},
|
| 30 |
-
{
|
| 31 |
-
"name": "Longitude",
|
| 32 |
-
"importance": 0.13300901651382446
|
| 33 |
-
},
|
| 34 |
-
{
|
| 35 |
-
"name": "Shipping Mode__Standard Class",
|
| 36 |
-
"importance": 0.12997667491436005
|
| 37 |
-
},
|
| 38 |
-
{
|
| 39 |
-
"name": "order_day",
|
| 40 |
-
"importance": 0.10712296515703201
|
| 41 |
-
},
|
| 42 |
-
{
|
| 43 |
-
"name": "order_month",
|
| 44 |
-
"importance": 0.07108364999294281
|
| 45 |
-
},
|
| 46 |
-
{
|
| 47 |
-
"name": "order_dow",
|
| 48 |
-
"importance": 0.06861100345849991
|
| 49 |
-
},
|
| 50 |
-
{
|
| 51 |
-
"name": "Order Item Total",
|
| 52 |
-
"importance": 0.0614430233836174
|
| 53 |
-
},
|
| 54 |
-
{
|
| 55 |
-
"name": "Type__DEBIT",
|
| 56 |
-
"importance": 0.05896211788058281
|
| 57 |
-
},
|
| 58 |
-
{
|
| 59 |
-
"name": "Sales",
|
| 60 |
-
"importance": 0.04449347406625748
|
| 61 |
-
},
|
| 62 |
-
{
|
| 63 |
-
"name": "Order Item Discount",
|
| 64 |
-
"importance": 0.04405033215880394
|
| 65 |
-
}
|
| 66 |
-
],
|
| 67 |
-
"n_samples": 1000
|
| 68 |
-
},
|
| 69 |
-
"shipping_mode": {
|
| 70 |
-
"algo": "lgb",
|
| 71 |
-
"top15_features": [
|
| 72 |
-
{
|
| 73 |
-
"name": "order_day",
|
| 74 |
-
"importance": 0.14531971700119595
|
| 75 |
-
},
|
| 76 |
-
{
|
| 77 |
-
"name": "Latitude",
|
| 78 |
-
"importance": 0.13565060253209485
|
| 79 |
-
},
|
| 80 |
-
{
|
| 81 |
-
"name": "Order Customer Id",
|
| 82 |
-
"importance": 0.13102491053295864
|
| 83 |
-
},
|
| 84 |
-
{
|
| 85 |
-
"name": "Longitude",
|
| 86 |
-
"importance": 0.1222981746063068
|
| 87 |
-
},
|
| 88 |
-
{
|
| 89 |
-
"name": "Order Zipcode",
|
| 90 |
-
"importance": 0.09815205910031981
|
| 91 |
-
},
|
| 92 |
-
{
|
| 93 |
-
"name": "order_month",
|
| 94 |
-
"importance": 0.09317142717955136
|
| 95 |
-
},
|
| 96 |
-
{
|
| 97 |
-
"name": "order_dow",
|
| 98 |
-
"importance": 0.07841270762869156
|
| 99 |
-
},
|
| 100 |
-
{
|
| 101 |
-
"name": "Order Item Total",
|
| 102 |
-
"importance": 0.044599598632655106
|
| 103 |
-
},
|
| 104 |
-
{
|
| 105 |
-
"name": "Order Item Discount",
|
| 106 |
-
"importance": 0.033594561793665254
|
| 107 |
-
},
|
| 108 |
-
{
|
| 109 |
-
"name": "order_year",
|
| 110 |
-
"importance": 0.029623813091121495
|
| 111 |
-
},
|
| 112 |
-
{
|
| 113 |
-
"name": "Customer Segment__Home Office",
|
| 114 |
-
"importance": 0.02582491478215546
|
| 115 |
-
},
|
| 116 |
-
{
|
| 117 |
-
"name": "Type__DEBIT",
|
| 118 |
-
"importance": 0.019900877735072642
|
| 119 |
-
},
|
| 120 |
-
{
|
| 121 |
-
"name": "Order Item Discount Rate",
|
| 122 |
-
"importance": 0.019821976340370435
|
| 123 |
-
},
|
| 124 |
-
{
|
| 125 |
-
"name": "Customer Segment__Consumer",
|
| 126 |
-
"importance": 0.019363164732533623
|
| 127 |
-
},
|
| 128 |
-
{
|
| 129 |
-
"name": "Sales",
|
| 130 |
-
"importance": 0.019305355520423926
|
| 131 |
-
}
|
| 132 |
-
],
|
| 133 |
-
"n_samples": 1000
|
| 134 |
-
},
|
| 135 |
-
"delivery_status": {
|
| 136 |
-
"algo": "lgb",
|
| 137 |
-
"top15_features": [
|
| 138 |
-
{
|
| 139 |
-
"name": "sched_days",
|
| 140 |
-
"importance": 1.0622776241691645
|
| 141 |
-
},
|
| 142 |
-
{
|
| 143 |
-
"name": "Type__TRANSFER",
|
| 144 |
-
"importance": 0.9869317661543312
|
| 145 |
-
},
|
| 146 |
-
{
|
| 147 |
-
"name": "Shipping Mode__First Class",
|
| 148 |
-
"importance": 0.5401095981609848
|
| 149 |
-
},
|
| 150 |
-
{
|
| 151 |
-
"name": "Latitude",
|
| 152 |
-
"importance": 0.1469638826819572
|
| 153 |
-
},
|
| 154 |
-
{
|
| 155 |
-
"name": "Order Customer Id",
|
| 156 |
-
"importance": 0.12387527105673957
|
| 157 |
-
},
|
| 158 |
-
{
|
| 159 |
-
"name": "Longitude",
|
| 160 |
-
"importance": 0.12152826063388397
|
| 161 |
-
},
|
| 162 |
-
{
|
| 163 |
-
"name": "Shipping Mode__Standard Class",
|
| 164 |
-
"importance": 0.11399112380975975
|
| 165 |
-
},
|
| 166 |
-
{
|
| 167 |
-
"name": "Type__DEBIT",
|
| 168 |
-
"importance": 0.11226916777330752
|
| 169 |
-
},
|
| 170 |
-
{
|
| 171 |
-
"name": "order_day",
|
| 172 |
-
"importance": 0.08720905988856538
|
| 173 |
-
},
|
| 174 |
-
{
|
| 175 |
-
"name": "Type__PAYMENT",
|
| 176 |
-
"importance": 0.07393674075739048
|
| 177 |
-
},
|
| 178 |
-
{
|
| 179 |
-
"name": "order_month",
|
| 180 |
-
"importance": 0.05996037188478746
|
| 181 |
-
},
|
| 182 |
-
{
|
| 183 |
-
"name": "order_dow",
|
| 184 |
-
"importance": 0.055766425673077755
|
| 185 |
-
},
|
| 186 |
-
{
|
| 187 |
-
"name": "Shipping Mode__Second Class",
|
| 188 |
-
"importance": 0.05278020082991879
|
| 189 |
-
},
|
| 190 |
-
{
|
| 191 |
-
"name": "Type__CASH",
|
| 192 |
-
"importance": 0.045583216438798695
|
| 193 |
-
},
|
| 194 |
-
{
|
| 195 |
-
"name": "Order Item Total",
|
| 196 |
-
"importance": 0.043191257310719586
|
| 197 |
-
}
|
| 198 |
-
],
|
| 199 |
-
"n_samples": 1000
|
| 200 |
-
}
|
| 201 |
-
},
|
| 202 |
-
"fairness": {
|
| 203 |
-
"late_delivery_risk": {
|
| 204 |
-
"Market": {
|
| 205 |
-
"Africa": {
|
| 206 |
-
"n": 1768,
|
| 207 |
-
"accuracy": 0.869343891402715
|
| 208 |
-
},
|
| 209 |
-
"Europe": {
|
| 210 |
-
"n": 7437,
|
| 211 |
-
"accuracy": 0.8284254403657388
|
| 212 |
-
},
|
| 213 |
-
"LATAM": {
|
| 214 |
-
"n": 7771,
|
| 215 |
-
"accuracy": 0.8390168575472912
|
| 216 |
-
},
|
| 217 |
-
"Pacific Asia": {
|
| 218 |
-
"n": 6263,
|
| 219 |
-
"accuracy": 0.8112725530895737
|
| 220 |
-
},
|
| 221 |
-
"USCA": {
|
| 222 |
-
"n": 3839,
|
| 223 |
-
"accuracy": 0.8767908309455588
|
| 224 |
-
},
|
| 225 |
-
"__summary__": {
|
| 226 |
-
"max_acc": 0.8767908309455588,
|
| 227 |
-
"min_acc": 0.8112725530895737,
|
| 228 |
-
"disparity": 0.06551827785598507
|
| 229 |
-
}
|
| 230 |
-
},
|
| 231 |
-
"Customer Segment": {
|
| 232 |
-
"Consumer": {
|
| 233 |
-
"n": 13998,
|
| 234 |
-
"accuracy": 0.8350478639805686
|
| 235 |
-
},
|
| 236 |
-
"Corporate": {
|
| 237 |
-
"n": 8212,
|
| 238 |
-
"accuracy": 0.8364588407208963
|
| 239 |
-
},
|
| 240 |
-
"Home Office": {
|
| 241 |
-
"n": 4868,
|
| 242 |
-
"accuracy": 0.8436729663105998
|
| 243 |
-
},
|
| 244 |
-
"__summary__": {
|
| 245 |
-
"max_acc": 0.8436729663105998,
|
| 246 |
-
"min_acc": 0.8350478639805686,
|
| 247 |
-
"disparity": 0.00862510233003122
|
| 248 |
-
}
|
| 249 |
-
}
|
| 250 |
-
},
|
| 251 |
-
"shipping_mode": {
|
| 252 |
-
"Market": {
|
| 253 |
-
"Africa": {
|
| 254 |
-
"n": 1721,
|
| 255 |
-
"accuracy": 0.8059267867518884
|
| 256 |
-
},
|
| 257 |
-
"Europe": {
|
| 258 |
-
"n": 7650,
|
| 259 |
-
"accuracy": 0.7586928104575164
|
| 260 |
-
},
|
| 261 |
-
"LATAM": {
|
| 262 |
-
"n": 7701,
|
| 263 |
-
"accuracy": 0.7809375405791456
|
| 264 |
-
},
|
| 265 |
-
"Pacific Asia": {
|
| 266 |
-
"n": 6143,
|
| 267 |
-
"accuracy": 0.7584242226924955
|
| 268 |
-
},
|
| 269 |
-
"USCA": {
|
| 270 |
-
"n": 3863,
|
| 271 |
-
"accuracy": 0.8193114159979291
|
| 272 |
-
},
|
| 273 |
-
"__summary__": {
|
| 274 |
-
"max_acc": 0.8193114159979291,
|
| 275 |
-
"min_acc": 0.7584242226924955,
|
| 276 |
-
"disparity": 0.06088719330543357
|
| 277 |
-
}
|
| 278 |
-
},
|
| 279 |
-
"Customer Segment": {
|
| 280 |
-
"Consumer": {
|
| 281 |
-
"n": 14008,
|
| 282 |
-
"accuracy": 0.7669902912621359
|
| 283 |
-
},
|
| 284 |
-
"Corporate": {
|
| 285 |
-
"n": 8269,
|
| 286 |
-
"accuracy": 0.7872777844963115
|
| 287 |
-
},
|
| 288 |
-
"Home Office": {
|
| 289 |
-
"n": 4801,
|
| 290 |
-
"accuracy": 0.7862945219745886
|
| 291 |
-
},
|
| 292 |
-
"__summary__": {
|
| 293 |
-
"max_acc": 0.7872777844963115,
|
| 294 |
-
"min_acc": 0.7669902912621359,
|
| 295 |
-
"disparity": 0.020287493234175558
|
| 296 |
-
}
|
| 297 |
-
}
|
| 298 |
-
},
|
| 299 |
-
"delivery_status": {
|
| 300 |
-
"Market": {
|
| 301 |
-
"Africa": {
|
| 302 |
-
"n": 1767,
|
| 303 |
-
"accuracy": 0.8687040181097906
|
| 304 |
-
},
|
| 305 |
-
"Europe": {
|
| 306 |
-
"n": 7505,
|
| 307 |
-
"accuracy": 0.8282478347768154
|
| 308 |
-
},
|
| 309 |
-
"LATAM": {
|
| 310 |
-
"n": 7746,
|
| 311 |
-
"accuracy": 0.8502452878905241
|
| 312 |
-
},
|
| 313 |
-
"Pacific Asia": {
|
| 314 |
-
"n": 6142,
|
| 315 |
-
"accuracy": 0.8150439596222728
|
| 316 |
-
},
|
| 317 |
-
"USCA": {
|
| 318 |
-
"n": 3918,
|
| 319 |
-
"accuracy": 0.8769780500255232
|
| 320 |
-
},
|
| 321 |
-
"__summary__": {
|
| 322 |
-
"max_acc": 0.8769780500255232,
|
| 323 |
-
"min_acc": 0.8150439596222728,
|
| 324 |
-
"disparity": 0.061934090403250375
|
| 325 |
-
}
|
| 326 |
-
},
|
| 327 |
-
"Customer Segment": {
|
| 328 |
-
"Consumer": {
|
| 329 |
-
"n": 14087,
|
| 330 |
-
"accuracy": 0.8335344643998013
|
| 331 |
-
},
|
| 332 |
-
"Corporate": {
|
| 333 |
-
"n": 8197,
|
| 334 |
-
"accuracy": 0.8446992802244724
|
| 335 |
-
},
|
| 336 |
-
"Home Office": {
|
| 337 |
-
"n": 4794,
|
| 338 |
-
"accuracy": 0.8579474342928661
|
| 339 |
-
},
|
| 340 |
-
"__summary__": {
|
| 341 |
-
"max_acc": 0.8579474342928661,
|
| 342 |
-
"min_acc": 0.8335344643998013,
|
| 343 |
-
"disparity": 0.02441296989306485
|
| 344 |
-
}
|
| 345 |
-
}
|
| 346 |
-
}
|
| 347 |
-
},
|
| 348 |
-
"calibration": {
|
| 349 |
-
"late_delivery_risk": {
|
| 350 |
-
"algo": "xgb",
|
| 351 |
-
"n_bins": 15,
|
| 352 |
-
"bin_confidence": [
|
| 353 |
-
0.047601889818906784,
|
| 354 |
-
0.10591482371091843,
|
| 355 |
-
0.1693299263715744,
|
| 356 |
-
0.23376236855983734,
|
| 357 |
-
0.2985405921936035,
|
| 358 |
-
0.365536093711853,
|
| 359 |
-
0.43266668915748596,
|
| 360 |
-
0.49862194061279297,
|
| 361 |
-
0.5664905309677124,
|
| 362 |
-
0.6322769522666931,
|
| 363 |
-
0.700205385684967,
|
| 364 |
-
0.7678216695785522,
|
| 365 |
-
0.834970235824585,
|
| 366 |
-
0.9012444019317627,
|
| 367 |
-
0.9871050715446472
|
| 368 |
-
],
|
| 369 |
-
"bin_accuracy": [
|
| 370 |
-
0.04878048780487805,
|
| 371 |
-
0.03429602888086643,
|
| 372 |
-
0.06657608695652174,
|
| 373 |
-
0.10221205186880244,
|
| 374 |
-
0.1659671880961465,
|
| 375 |
-
0.3065795613625758,
|
| 376 |
-
0.4490950226244344,
|
| 377 |
-
0.6264543784445805,
|
| 378 |
-
0.7001414427157001,
|
| 379 |
-
0.7884012539184952,
|
| 380 |
-
0.8334786399302528,
|
| 381 |
-
0.8685524126455907,
|
| 382 |
-
0.920274914089347,
|
| 383 |
-
0.9493734335839599,
|
| 384 |
-
0.9918243401074516
|
| 385 |
-
],
|
| 386 |
-
"bin_n": [
|
| 387 |
-
205,
|
| 388 |
-
1108,
|
| 389 |
-
2208,
|
| 390 |
-
2622,
|
| 391 |
-
2621,
|
| 392 |
-
2143,
|
| 393 |
-
1768,
|
| 394 |
-
1633,
|
| 395 |
-
1414,
|
| 396 |
-
1276,
|
| 397 |
-
1147,
|
| 398 |
-
1202,
|
| 399 |
-
1455,
|
| 400 |
-
1995,
|
| 401 |
-
4281
|
| 402 |
-
],
|
| 403 |
-
"ece": 0.08366547522741584,
|
| 404 |
-
"brier": 0.12393409580512378,
|
| 405 |
-
"temperature_scaling_T": 0.6172709141132063
|
| 406 |
-
},
|
| 407 |
-
"shipping_mode": {
|
| 408 |
-
"algo": "lgb",
|
| 409 |
-
"n_bins": 15,
|
| 410 |
-
"bin_confidence": [
|
| 411 |
-
0.3121110714805393,
|
| 412 |
-
0.37706821969221477,
|
| 413 |
-
0.44009373318135214,
|
| 414 |
-
0.5003264091242992,
|
| 415 |
-
0.5668423455793702,
|
| 416 |
-
0.6341087325686549,
|
| 417 |
-
0.7010409508680902,
|
| 418 |
-
0.7664726820296514,
|
| 419 |
-
0.8315982324325599,
|
| 420 |
-
0.8946591419686111,
|
| 421 |
-
0.9531121751216614
|
| 422 |
-
],
|
| 423 |
-
"bin_accuracy": [
|
| 424 |
-
0.2,
|
| 425 |
-
0.3730886850152905,
|
| 426 |
-
0.45858343337334934,
|
| 427 |
-
0.49913164293157347,
|
| 428 |
-
0.5809395065900642,
|
| 429 |
-
0.7184009406231628,
|
| 430 |
-
0.8413356080916402,
|
| 431 |
-
0.9226793467025015,
|
| 432 |
-
0.9520665199315236,
|
| 433 |
-
0.9763365468886941,
|
| 434 |
-
0.9710982658959537
|
| 435 |
-
],
|
| 436 |
-
"bin_n": [
|
| 437 |
-
15,
|
| 438 |
-
327,
|
| 439 |
-
1666,
|
| 440 |
-
2879,
|
| 441 |
-
2959,
|
| 442 |
-
3402,
|
| 443 |
-
4103,
|
| 444 |
-
4837,
|
| 445 |
-
4089,
|
| 446 |
-
2282,
|
| 447 |
-
519
|
| 448 |
-
],
|
| 449 |
-
"ece": 0.08808701528421295,
|
| 450 |
-
"brier": 0.14974528304098794,
|
| 451 |
-
"temperature_scaling_T": 0.7013679012815588
|
| 452 |
-
},
|
| 453 |
-
"delivery_status": {
|
| 454 |
-
"algo": "lgb",
|
| 455 |
-
"n_bins": 15,
|
| 456 |
-
"bin_confidence": [
|
| 457 |
-
0.31674386091217493,
|
| 458 |
-
0.3747040640569195,
|
| 459 |
-
0.4360554176701256,
|
| 460 |
-
0.49978873696550224,
|
| 461 |
-
0.5660495258460405,
|
| 462 |
-
0.6325569759747155,
|
| 463 |
-
0.6996959611938123,
|
| 464 |
-
0.7661925883072682,
|
| 465 |
-
0.8343464222875331,
|
| 466 |
-
0.9017332581703068,
|
| 467 |
-
0.9839647836453121
|
| 468 |
-
],
|
| 469 |
-
"bin_accuracy": [
|
| 470 |
-
0.2222222222222222,
|
| 471 |
-
0.3987341772151899,
|
| 472 |
-
0.5257352941176471,
|
| 473 |
-
0.6634679020516214,
|
| 474 |
-
0.8109608047173084,
|
| 475 |
-
0.8790291998483125,
|
| 476 |
-
0.9103793247186328,
|
| 477 |
-
0.9274406332453826,
|
| 478 |
-
0.9517241379310345,
|
| 479 |
-
0.9663677130044843,
|
| 480 |
-
0.9874145990650846
|
| 481 |
-
],
|
| 482 |
-
"bin_n": [
|
| 483 |
-
54,
|
| 484 |
-
948,
|
| 485 |
-
2448,
|
| 486 |
-
3022,
|
| 487 |
-
2883,
|
| 488 |
-
2637,
|
| 489 |
-
2399,
|
| 490 |
-
2274,
|
| 491 |
-
2175,
|
| 492 |
-
2676,
|
| 493 |
-
5562
|
| 494 |
-
],
|
| 495 |
-
"ece": 0.12621462481898915,
|
| 496 |
-
"brier": 0.1285071700698595,
|
| 497 |
-
"temperature_scaling_T": 0.5595696359480499
|
| 498 |
-
}
|
| 499 |
-
},
|
| 500 |
-
"reliability_plot_saved": true,
|
| 501 |
-
"elapsed_min": 1.084403399626414
|
| 502 |
}
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"shap_top15": {
|
| 3 |
+
"late_delivery_risk": {
|
| 4 |
+
"algo": "xgb",
|
| 5 |
+
"top15_features": [
|
| 6 |
+
{
|
| 7 |
+
"name": "Shipping Mode__First Class",
|
| 8 |
+
"importance": 0.7326152324676514
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"name": "sched_days",
|
| 12 |
+
"importance": 0.6606742739677429
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"name": "Type__TRANSFER",
|
| 16 |
+
"importance": 0.47632965445518494
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"name": "Order Customer Id",
|
| 20 |
+
"importance": 0.17082303762435913
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"name": "Latitude",
|
| 24 |
+
"importance": 0.160926952958107
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"name": "Shipping Mode__Second Class",
|
| 28 |
+
"importance": 0.14983786642551422
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"name": "Longitude",
|
| 32 |
+
"importance": 0.13300901651382446
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
"name": "Shipping Mode__Standard Class",
|
| 36 |
+
"importance": 0.12997667491436005
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"name": "order_day",
|
| 40 |
+
"importance": 0.10712296515703201
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"name": "order_month",
|
| 44 |
+
"importance": 0.07108364999294281
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"name": "order_dow",
|
| 48 |
+
"importance": 0.06861100345849991
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"name": "Order Item Total",
|
| 52 |
+
"importance": 0.0614430233836174
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"name": "Type__DEBIT",
|
| 56 |
+
"importance": 0.05896211788058281
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"name": "Sales",
|
| 60 |
+
"importance": 0.04449347406625748
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"name": "Order Item Discount",
|
| 64 |
+
"importance": 0.04405033215880394
|
| 65 |
+
}
|
| 66 |
+
],
|
| 67 |
+
"n_samples": 1000
|
| 68 |
+
},
|
| 69 |
+
"shipping_mode": {
|
| 70 |
+
"algo": "lgb",
|
| 71 |
+
"top15_features": [
|
| 72 |
+
{
|
| 73 |
+
"name": "order_day",
|
| 74 |
+
"importance": 0.14531971700119595
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"name": "Latitude",
|
| 78 |
+
"importance": 0.13565060253209485
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"name": "Order Customer Id",
|
| 82 |
+
"importance": 0.13102491053295864
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"name": "Longitude",
|
| 86 |
+
"importance": 0.1222981746063068
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"name": "Order Zipcode",
|
| 90 |
+
"importance": 0.09815205910031981
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"name": "order_month",
|
| 94 |
+
"importance": 0.09317142717955136
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"name": "order_dow",
|
| 98 |
+
"importance": 0.07841270762869156
|
| 99 |
+
},
|
| 100 |
+
{
|
| 101 |
+
"name": "Order Item Total",
|
| 102 |
+
"importance": 0.044599598632655106
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"name": "Order Item Discount",
|
| 106 |
+
"importance": 0.033594561793665254
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"name": "order_year",
|
| 110 |
+
"importance": 0.029623813091121495
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"name": "Customer Segment__Home Office",
|
| 114 |
+
"importance": 0.02582491478215546
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"name": "Type__DEBIT",
|
| 118 |
+
"importance": 0.019900877735072642
|
| 119 |
+
},
|
| 120 |
+
{
|
| 121 |
+
"name": "Order Item Discount Rate",
|
| 122 |
+
"importance": 0.019821976340370435
|
| 123 |
+
},
|
| 124 |
+
{
|
| 125 |
+
"name": "Customer Segment__Consumer",
|
| 126 |
+
"importance": 0.019363164732533623
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"name": "Sales",
|
| 130 |
+
"importance": 0.019305355520423926
|
| 131 |
+
}
|
| 132 |
+
],
|
| 133 |
+
"n_samples": 1000
|
| 134 |
+
},
|
| 135 |
+
"delivery_status": {
|
| 136 |
+
"algo": "lgb",
|
| 137 |
+
"top15_features": [
|
| 138 |
+
{
|
| 139 |
+
"name": "sched_days",
|
| 140 |
+
"importance": 1.0622776241691645
|
| 141 |
+
},
|
| 142 |
+
{
|
| 143 |
+
"name": "Type__TRANSFER",
|
| 144 |
+
"importance": 0.9869317661543312
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"name": "Shipping Mode__First Class",
|
| 148 |
+
"importance": 0.5401095981609848
|
| 149 |
+
},
|
| 150 |
+
{
|
| 151 |
+
"name": "Latitude",
|
| 152 |
+
"importance": 0.1469638826819572
|
| 153 |
+
},
|
| 154 |
+
{
|
| 155 |
+
"name": "Order Customer Id",
|
| 156 |
+
"importance": 0.12387527105673957
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"name": "Longitude",
|
| 160 |
+
"importance": 0.12152826063388397
|
| 161 |
+
},
|
| 162 |
+
{
|
| 163 |
+
"name": "Shipping Mode__Standard Class",
|
| 164 |
+
"importance": 0.11399112380975975
|
| 165 |
+
},
|
| 166 |
+
{
|
| 167 |
+
"name": "Type__DEBIT",
|
| 168 |
+
"importance": 0.11226916777330752
|
| 169 |
+
},
|
| 170 |
+
{
|
| 171 |
+
"name": "order_day",
|
| 172 |
+
"importance": 0.08720905988856538
|
| 173 |
+
},
|
| 174 |
+
{
|
| 175 |
+
"name": "Type__PAYMENT",
|
| 176 |
+
"importance": 0.07393674075739048
|
| 177 |
+
},
|
| 178 |
+
{
|
| 179 |
+
"name": "order_month",
|
| 180 |
+
"importance": 0.05996037188478746
|
| 181 |
+
},
|
| 182 |
+
{
|
| 183 |
+
"name": "order_dow",
|
| 184 |
+
"importance": 0.055766425673077755
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"name": "Shipping Mode__Second Class",
|
| 188 |
+
"importance": 0.05278020082991879
|
| 189 |
+
},
|
| 190 |
+
{
|
| 191 |
+
"name": "Type__CASH",
|
| 192 |
+
"importance": 0.045583216438798695
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"name": "Order Item Total",
|
| 196 |
+
"importance": 0.043191257310719586
|
| 197 |
+
}
|
| 198 |
+
],
|
| 199 |
+
"n_samples": 1000
|
| 200 |
+
}
|
| 201 |
+
},
|
| 202 |
+
"fairness": {
|
| 203 |
+
"late_delivery_risk": {
|
| 204 |
+
"Market": {
|
| 205 |
+
"Africa": {
|
| 206 |
+
"n": 1768,
|
| 207 |
+
"accuracy": 0.869343891402715
|
| 208 |
+
},
|
| 209 |
+
"Europe": {
|
| 210 |
+
"n": 7437,
|
| 211 |
+
"accuracy": 0.8284254403657388
|
| 212 |
+
},
|
| 213 |
+
"LATAM": {
|
| 214 |
+
"n": 7771,
|
| 215 |
+
"accuracy": 0.8390168575472912
|
| 216 |
+
},
|
| 217 |
+
"Pacific Asia": {
|
| 218 |
+
"n": 6263,
|
| 219 |
+
"accuracy": 0.8112725530895737
|
| 220 |
+
},
|
| 221 |
+
"USCA": {
|
| 222 |
+
"n": 3839,
|
| 223 |
+
"accuracy": 0.8767908309455588
|
| 224 |
+
},
|
| 225 |
+
"__summary__": {
|
| 226 |
+
"max_acc": 0.8767908309455588,
|
| 227 |
+
"min_acc": 0.8112725530895737,
|
| 228 |
+
"disparity": 0.06551827785598507
|
| 229 |
+
}
|
| 230 |
+
},
|
| 231 |
+
"Customer Segment": {
|
| 232 |
+
"Consumer": {
|
| 233 |
+
"n": 13998,
|
| 234 |
+
"accuracy": 0.8350478639805686
|
| 235 |
+
},
|
| 236 |
+
"Corporate": {
|
| 237 |
+
"n": 8212,
|
| 238 |
+
"accuracy": 0.8364588407208963
|
| 239 |
+
},
|
| 240 |
+
"Home Office": {
|
| 241 |
+
"n": 4868,
|
| 242 |
+
"accuracy": 0.8436729663105998
|
| 243 |
+
},
|
| 244 |
+
"__summary__": {
|
| 245 |
+
"max_acc": 0.8436729663105998,
|
| 246 |
+
"min_acc": 0.8350478639805686,
|
| 247 |
+
"disparity": 0.00862510233003122
|
| 248 |
+
}
|
| 249 |
+
}
|
| 250 |
+
},
|
| 251 |
+
"shipping_mode": {
|
| 252 |
+
"Market": {
|
| 253 |
+
"Africa": {
|
| 254 |
+
"n": 1721,
|
| 255 |
+
"accuracy": 0.8059267867518884
|
| 256 |
+
},
|
| 257 |
+
"Europe": {
|
| 258 |
+
"n": 7650,
|
| 259 |
+
"accuracy": 0.7586928104575164
|
| 260 |
+
},
|
| 261 |
+
"LATAM": {
|
| 262 |
+
"n": 7701,
|
| 263 |
+
"accuracy": 0.7809375405791456
|
| 264 |
+
},
|
| 265 |
+
"Pacific Asia": {
|
| 266 |
+
"n": 6143,
|
| 267 |
+
"accuracy": 0.7584242226924955
|
| 268 |
+
},
|
| 269 |
+
"USCA": {
|
| 270 |
+
"n": 3863,
|
| 271 |
+
"accuracy": 0.8193114159979291
|
| 272 |
+
},
|
| 273 |
+
"__summary__": {
|
| 274 |
+
"max_acc": 0.8193114159979291,
|
| 275 |
+
"min_acc": 0.7584242226924955,
|
| 276 |
+
"disparity": 0.06088719330543357
|
| 277 |
+
}
|
| 278 |
+
},
|
| 279 |
+
"Customer Segment": {
|
| 280 |
+
"Consumer": {
|
| 281 |
+
"n": 14008,
|
| 282 |
+
"accuracy": 0.7669902912621359
|
| 283 |
+
},
|
| 284 |
+
"Corporate": {
|
| 285 |
+
"n": 8269,
|
| 286 |
+
"accuracy": 0.7872777844963115
|
| 287 |
+
},
|
| 288 |
+
"Home Office": {
|
| 289 |
+
"n": 4801,
|
| 290 |
+
"accuracy": 0.7862945219745886
|
| 291 |
+
},
|
| 292 |
+
"__summary__": {
|
| 293 |
+
"max_acc": 0.7872777844963115,
|
| 294 |
+
"min_acc": 0.7669902912621359,
|
| 295 |
+
"disparity": 0.020287493234175558
|
| 296 |
+
}
|
| 297 |
+
}
|
| 298 |
+
},
|
| 299 |
+
"delivery_status": {
|
| 300 |
+
"Market": {
|
| 301 |
+
"Africa": {
|
| 302 |
+
"n": 1767,
|
| 303 |
+
"accuracy": 0.8687040181097906
|
| 304 |
+
},
|
| 305 |
+
"Europe": {
|
| 306 |
+
"n": 7505,
|
| 307 |
+
"accuracy": 0.8282478347768154
|
| 308 |
+
},
|
| 309 |
+
"LATAM": {
|
| 310 |
+
"n": 7746,
|
| 311 |
+
"accuracy": 0.8502452878905241
|
| 312 |
+
},
|
| 313 |
+
"Pacific Asia": {
|
| 314 |
+
"n": 6142,
|
| 315 |
+
"accuracy": 0.8150439596222728
|
| 316 |
+
},
|
| 317 |
+
"USCA": {
|
| 318 |
+
"n": 3918,
|
| 319 |
+
"accuracy": 0.8769780500255232
|
| 320 |
+
},
|
| 321 |
+
"__summary__": {
|
| 322 |
+
"max_acc": 0.8769780500255232,
|
| 323 |
+
"min_acc": 0.8150439596222728,
|
| 324 |
+
"disparity": 0.061934090403250375
|
| 325 |
+
}
|
| 326 |
+
},
|
| 327 |
+
"Customer Segment": {
|
| 328 |
+
"Consumer": {
|
| 329 |
+
"n": 14087,
|
| 330 |
+
"accuracy": 0.8335344643998013
|
| 331 |
+
},
|
| 332 |
+
"Corporate": {
|
| 333 |
+
"n": 8197,
|
| 334 |
+
"accuracy": 0.8446992802244724
|
| 335 |
+
},
|
| 336 |
+
"Home Office": {
|
| 337 |
+
"n": 4794,
|
| 338 |
+
"accuracy": 0.8579474342928661
|
| 339 |
+
},
|
| 340 |
+
"__summary__": {
|
| 341 |
+
"max_acc": 0.8579474342928661,
|
| 342 |
+
"min_acc": 0.8335344643998013,
|
| 343 |
+
"disparity": 0.02441296989306485
|
| 344 |
+
}
|
| 345 |
+
}
|
| 346 |
+
}
|
| 347 |
+
},
|
| 348 |
+
"calibration": {
|
| 349 |
+
"late_delivery_risk": {
|
| 350 |
+
"algo": "xgb",
|
| 351 |
+
"n_bins": 15,
|
| 352 |
+
"bin_confidence": [
|
| 353 |
+
0.047601889818906784,
|
| 354 |
+
0.10591482371091843,
|
| 355 |
+
0.1693299263715744,
|
| 356 |
+
0.23376236855983734,
|
| 357 |
+
0.2985405921936035,
|
| 358 |
+
0.365536093711853,
|
| 359 |
+
0.43266668915748596,
|
| 360 |
+
0.49862194061279297,
|
| 361 |
+
0.5664905309677124,
|
| 362 |
+
0.6322769522666931,
|
| 363 |
+
0.700205385684967,
|
| 364 |
+
0.7678216695785522,
|
| 365 |
+
0.834970235824585,
|
| 366 |
+
0.9012444019317627,
|
| 367 |
+
0.9871050715446472
|
| 368 |
+
],
|
| 369 |
+
"bin_accuracy": [
|
| 370 |
+
0.04878048780487805,
|
| 371 |
+
0.03429602888086643,
|
| 372 |
+
0.06657608695652174,
|
| 373 |
+
0.10221205186880244,
|
| 374 |
+
0.1659671880961465,
|
| 375 |
+
0.3065795613625758,
|
| 376 |
+
0.4490950226244344,
|
| 377 |
+
0.6264543784445805,
|
| 378 |
+
0.7001414427157001,
|
| 379 |
+
0.7884012539184952,
|
| 380 |
+
0.8334786399302528,
|
| 381 |
+
0.8685524126455907,
|
| 382 |
+
0.920274914089347,
|
| 383 |
+
0.9493734335839599,
|
| 384 |
+
0.9918243401074516
|
| 385 |
+
],
|
| 386 |
+
"bin_n": [
|
| 387 |
+
205,
|
| 388 |
+
1108,
|
| 389 |
+
2208,
|
| 390 |
+
2622,
|
| 391 |
+
2621,
|
| 392 |
+
2143,
|
| 393 |
+
1768,
|
| 394 |
+
1633,
|
| 395 |
+
1414,
|
| 396 |
+
1276,
|
| 397 |
+
1147,
|
| 398 |
+
1202,
|
| 399 |
+
1455,
|
| 400 |
+
1995,
|
| 401 |
+
4281
|
| 402 |
+
],
|
| 403 |
+
"ece": 0.08366547522741584,
|
| 404 |
+
"brier": 0.12393409580512378,
|
| 405 |
+
"temperature_scaling_T": 0.6172709141132063
|
| 406 |
+
},
|
| 407 |
+
"shipping_mode": {
|
| 408 |
+
"algo": "lgb",
|
| 409 |
+
"n_bins": 15,
|
| 410 |
+
"bin_confidence": [
|
| 411 |
+
0.3121110714805393,
|
| 412 |
+
0.37706821969221477,
|
| 413 |
+
0.44009373318135214,
|
| 414 |
+
0.5003264091242992,
|
| 415 |
+
0.5668423455793702,
|
| 416 |
+
0.6341087325686549,
|
| 417 |
+
0.7010409508680902,
|
| 418 |
+
0.7664726820296514,
|
| 419 |
+
0.8315982324325599,
|
| 420 |
+
0.8946591419686111,
|
| 421 |
+
0.9531121751216614
|
| 422 |
+
],
|
| 423 |
+
"bin_accuracy": [
|
| 424 |
+
0.2,
|
| 425 |
+
0.3730886850152905,
|
| 426 |
+
0.45858343337334934,
|
| 427 |
+
0.49913164293157347,
|
| 428 |
+
0.5809395065900642,
|
| 429 |
+
0.7184009406231628,
|
| 430 |
+
0.8413356080916402,
|
| 431 |
+
0.9226793467025015,
|
| 432 |
+
0.9520665199315236,
|
| 433 |
+
0.9763365468886941,
|
| 434 |
+
0.9710982658959537
|
| 435 |
+
],
|
| 436 |
+
"bin_n": [
|
| 437 |
+
15,
|
| 438 |
+
327,
|
| 439 |
+
1666,
|
| 440 |
+
2879,
|
| 441 |
+
2959,
|
| 442 |
+
3402,
|
| 443 |
+
4103,
|
| 444 |
+
4837,
|
| 445 |
+
4089,
|
| 446 |
+
2282,
|
| 447 |
+
519
|
| 448 |
+
],
|
| 449 |
+
"ece": 0.08808701528421295,
|
| 450 |
+
"brier": 0.14974528304098794,
|
| 451 |
+
"temperature_scaling_T": 0.7013679012815588
|
| 452 |
+
},
|
| 453 |
+
"delivery_status": {
|
| 454 |
+
"algo": "lgb",
|
| 455 |
+
"n_bins": 15,
|
| 456 |
+
"bin_confidence": [
|
| 457 |
+
0.31674386091217493,
|
| 458 |
+
0.3747040640569195,
|
| 459 |
+
0.4360554176701256,
|
| 460 |
+
0.49978873696550224,
|
| 461 |
+
0.5660495258460405,
|
| 462 |
+
0.6325569759747155,
|
| 463 |
+
0.6996959611938123,
|
| 464 |
+
0.7661925883072682,
|
| 465 |
+
0.8343464222875331,
|
| 466 |
+
0.9017332581703068,
|
| 467 |
+
0.9839647836453121
|
| 468 |
+
],
|
| 469 |
+
"bin_accuracy": [
|
| 470 |
+
0.2222222222222222,
|
| 471 |
+
0.3987341772151899,
|
| 472 |
+
0.5257352941176471,
|
| 473 |
+
0.6634679020516214,
|
| 474 |
+
0.8109608047173084,
|
| 475 |
+
0.8790291998483125,
|
| 476 |
+
0.9103793247186328,
|
| 477 |
+
0.9274406332453826,
|
| 478 |
+
0.9517241379310345,
|
| 479 |
+
0.9663677130044843,
|
| 480 |
+
0.9874145990650846
|
| 481 |
+
],
|
| 482 |
+
"bin_n": [
|
| 483 |
+
54,
|
| 484 |
+
948,
|
| 485 |
+
2448,
|
| 486 |
+
3022,
|
| 487 |
+
2883,
|
| 488 |
+
2637,
|
| 489 |
+
2399,
|
| 490 |
+
2274,
|
| 491 |
+
2175,
|
| 492 |
+
2676,
|
| 493 |
+
5562
|
| 494 |
+
],
|
| 495 |
+
"ece": 0.12621462481898915,
|
| 496 |
+
"brier": 0.1285071700698595,
|
| 497 |
+
"temperature_scaling_T": 0.5595696359480499
|
| 498 |
+
}
|
| 499 |
+
},
|
| 500 |
+
"reliability_plot_saved": true,
|
| 501 |
+
"elapsed_min": 1.084403399626414
|
| 502 |
}
|
FINAL_SUBMIT/receipts/R3_BIGTFT_INTEGRATION.json
CHANGED
|
@@ -1,52 +1,52 @@
|
|
| 1 |
-
{
|
| 2 |
-
"model": "Temporal Fusion Transformer",
|
| 3 |
-
"paper": "Lim et al. 2021 \u2014 Temporal Fusion Transformers for interpretable multi-horizon time series forecasting",
|
| 4 |
-
"implementation": "rl/forecasting/tft.py (v1 single-target) + rl/forecasting/train_tft_real.py (v2 multi-target)",
|
| 5 |
-
"params": {
|
| 6 |
-
"v1": 90602,
|
| 7 |
-
"v2": 513534
|
| 8 |
-
},
|
| 9 |
-
"checkpoints": {
|
| 10 |
-
"v1_real": {
|
| 11 |
-
"path": "rl/checkpoints/tft_real.pt",
|
| 12 |
-
"params": 90602,
|
| 13 |
-
"test_mae_usd": 7.8270111083984375,
|
| 14 |
-
"quantile_loss": 0.07062085568904877,
|
| 15 |
-
"horizon": 14,
|
| 16 |
-
"target": "DCOILWTICO"
|
| 17 |
-
},
|
| 18 |
-
"v2_multi": {
|
| 19 |
-
"path": "rl/checkpoints/tft_v2.pt",
|
| 20 |
-
"params": 513534,
|
| 21 |
-
"test_mae_p50": {
|
| 22 |
-
"DCOILWTICO": 52.868377685546875,
|
| 23 |
-
"PCOPPUSDM": 2165.05419921875,
|
| 24 |
-
"PPICMM": 127.1404800415039
|
| 25 |
-
},
|
| 26 |
-
"best_val_qloss": 0.024498114362359047,
|
| 27 |
-
"n_rolling_folds": 10
|
| 28 |
-
}
|
| 29 |
-
},
|
| 30 |
-
"integration_in_r3_past_self": {
|
| 31 |
-
"target": "DCOILWTICO",
|
| 32 |
-
"horizon": 14,
|
| 33 |
-
"r3_forecasters": {
|
| 34 |
-
"chronos_bolt": {
|
| 35 |
-
"mean_mae": 3.4998963623046877
|
| 36 |
-
},
|
| 37 |
-
"timesfm_2": {
|
| 38 |
-
"mean_mae": 3.4601973173958918
|
| 39 |
-
},
|
| 40 |
-
"arima": {
|
| 41 |
-
"mean_mae": 3.37419745103306
|
| 42 |
-
},
|
| 43 |
-
"prophet": {
|
| 44 |
-
"mean_mae": 9.348899015962079
|
| 45 |
-
}
|
| 46 |
-
},
|
| 47 |
-
"v1_tft_WTI_test_mae_usd": 7.8270111083984375,
|
| 48 |
-
"v2_tft_multi_DCOILWTICO_test_mae": 52.868377685546875,
|
| 49 |
-
"note": "TFT v1 MAE of $7.83 on single-target WTI is competitive with R3 Chronos/ARIMA values on the same series at 14-day horizon. v2 multi-target TFT numbers are higher because of multi-target sharing and scale difference (USD vs. FX cents); for a fair apples-to-apples position in R3, the v1 single-target checkpoint is used."
|
| 50 |
-
},
|
| 51 |
-
"scoped_next_step_r3_v4": "A full re-training of BigTFT on all 8 FRED targets with the R3 20-fold rolling-origin backtest would require porting to pytorch-forecasting's TimeSeriesDataSet. Scoped as follow-up; v1 checkpoint numbers are the current representative point-of-reference for BigTFT in this release."
|
| 52 |
}
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "Temporal Fusion Transformer",
|
| 3 |
+
"paper": "Lim et al. 2021 \u2014 Temporal Fusion Transformers for interpretable multi-horizon time series forecasting",
|
| 4 |
+
"implementation": "rl/forecasting/tft.py (v1 single-target) + rl/forecasting/train_tft_real.py (v2 multi-target)",
|
| 5 |
+
"params": {
|
| 6 |
+
"v1": 90602,
|
| 7 |
+
"v2": 513534
|
| 8 |
+
},
|
| 9 |
+
"checkpoints": {
|
| 10 |
+
"v1_real": {
|
| 11 |
+
"path": "rl/checkpoints/tft_real.pt",
|
| 12 |
+
"params": 90602,
|
| 13 |
+
"test_mae_usd": 7.8270111083984375,
|
| 14 |
+
"quantile_loss": 0.07062085568904877,
|
| 15 |
+
"horizon": 14,
|
| 16 |
+
"target": "DCOILWTICO"
|
| 17 |
+
},
|
| 18 |
+
"v2_multi": {
|
| 19 |
+
"path": "rl/checkpoints/tft_v2.pt",
|
| 20 |
+
"params": 513534,
|
| 21 |
+
"test_mae_p50": {
|
| 22 |
+
"DCOILWTICO": 52.868377685546875,
|
| 23 |
+
"PCOPPUSDM": 2165.05419921875,
|
| 24 |
+
"PPICMM": 127.1404800415039
|
| 25 |
+
},
|
| 26 |
+
"best_val_qloss": 0.024498114362359047,
|
| 27 |
+
"n_rolling_folds": 10
|
| 28 |
+
}
|
| 29 |
+
},
|
| 30 |
+
"integration_in_r3_past_self": {
|
| 31 |
+
"target": "DCOILWTICO",
|
| 32 |
+
"horizon": 14,
|
| 33 |
+
"r3_forecasters": {
|
| 34 |
+
"chronos_bolt": {
|
| 35 |
+
"mean_mae": 3.4998963623046877
|
| 36 |
+
},
|
| 37 |
+
"timesfm_2": {
|
| 38 |
+
"mean_mae": 3.4601973173958918
|
| 39 |
+
},
|
| 40 |
+
"arima": {
|
| 41 |
+
"mean_mae": 3.37419745103306
|
| 42 |
+
},
|
| 43 |
+
"prophet": {
|
| 44 |
+
"mean_mae": 9.348899015962079
|
| 45 |
+
}
|
| 46 |
+
},
|
| 47 |
+
"v1_tft_WTI_test_mae_usd": 7.8270111083984375,
|
| 48 |
+
"v2_tft_multi_DCOILWTICO_test_mae": 52.868377685546875,
|
| 49 |
+
"note": "TFT v1 MAE of $7.83 on single-target WTI is competitive with R3 Chronos/ARIMA values on the same series at 14-day horizon. v2 multi-target TFT numbers are higher because of multi-target sharing and scale difference (USD vs. FX cents); for a fair apples-to-apples position in R3, the v1 single-target checkpoint is used."
|
| 50 |
+
},
|
| 51 |
+
"scoped_next_step_r3_v4": "A full re-training of BigTFT on all 8 FRED targets with the R3 20-fold rolling-origin backtest would require porting to pytorch-forecasting's TimeSeriesDataSet. Scoped as follow-up; v1 checkpoint numbers are the current representative point-of-reference for BigTFT in this release."
|
| 52 |
}
|
FINAL_SUBMIT/receipts/R3_PAST_SELF.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
FINAL_SUBMIT/receipts/R3_STACKING_V2.json
CHANGED
|
@@ -1,1188 +1,1188 @@
|
|
| 1 |
-
{
|
| 2 |
-
"description": "Constrained-stacking comparison. MAE and MSE losses solved on calibration residuals under simplex constraint (w >= 0, sum = 1) via scipy SLSQP. Tested on held-out folds. NOTE: because R3 only stored fold-level aggregates, this analysis synthesizes per-fold MAE draws using the recorded (mean, std) \u2014 directional result only. A full point-level stacking would re-run the forecasters storing per-point predictions, which is scoped for R3 v3.",
|
| 3 |
-
"targets_analyzed": 21,
|
| 4 |
-
"winner_counts": {
|
| 5 |
-
"constrained (MAE or MSE)": 9,
|
| 6 |
-
"equal_weights": 2,
|
| 7 |
-
"best_individual": 10
|
| 8 |
-
},
|
| 9 |
-
"per_target_horizon": {
|
| 10 |
-
"DCOILWTICO_7": {
|
| 11 |
-
"n_cal_folds": 10,
|
| 12 |
-
"n_test_folds": 10,
|
| 13 |
-
"models": [
|
| 14 |
-
"chronos",
|
| 15 |
-
"timesfm",
|
| 16 |
-
"arima",
|
| 17 |
-
"prophet"
|
| 18 |
-
],
|
| 19 |
-
"weights": {
|
| 20 |
-
"equal": {
|
| 21 |
-
"w": [
|
| 22 |
-
0.25,
|
| 23 |
-
0.25,
|
| 24 |
-
0.25,
|
| 25 |
-
0.25
|
| 26 |
-
],
|
| 27 |
-
"test_mae": 4.078327693241436
|
| 28 |
-
},
|
| 29 |
-
"inverse_mae": {
|
| 30 |
-
"w": [
|
| 31 |
-
0.3473502883901263,
|
| 32 |
-
0.2560874881405812,
|
| 33 |
-
0.3115195598071785,
|
| 34 |
-
0.08504266366211403
|
| 35 |
-
],
|
| 36 |
-
"test_mae": 3.3276628679064912
|
| 37 |
-
},
|
| 38 |
-
"constrained_mae": {
|
| 39 |
-
"w": [
|
| 40 |
-
0.9999999999996985,
|
| 41 |
-
1.046385200709126e-13,
|
| 42 |
-
0.0,
|
| 43 |
-
1.9696744235629476e-13
|
| 44 |
-
],
|
| 45 |
-
"test_mae": 2.653996344639796
|
| 46 |
-
},
|
| 47 |
-
"constrained_mse": {
|
| 48 |
-
"w": [
|
| 49 |
-
0.71816178869903,
|
| 50 |
-
6.540164218966743e-14,
|
| 51 |
-
0.2818382113009046,
|
| 52 |
-
0.0
|
| 53 |
-
],
|
| 54 |
-
"test_mae": 2.8532434560990985
|
| 55 |
-
}
|
| 56 |
-
},
|
| 57 |
-
"best_individual_on_cal": {
|
| 58 |
-
"model": "chronos",
|
| 59 |
-
"test_mae": 2.6539963446388284
|
| 60 |
-
},
|
| 61 |
-
"winner": {
|
| 62 |
-
"method": "best_individual",
|
| 63 |
-
"test_mae": 2.6539963446388284
|
| 64 |
-
}
|
| 65 |
-
},
|
| 66 |
-
"DCOILWTICO_14": {
|
| 67 |
-
"n_cal_folds": 10,
|
| 68 |
-
"n_test_folds": 10,
|
| 69 |
-
"models": [
|
| 70 |
-
"chronos",
|
| 71 |
-
"timesfm",
|
| 72 |
-
"arima",
|
| 73 |
-
"prophet"
|
| 74 |
-
],
|
| 75 |
-
"weights": {
|
| 76 |
-
"equal": {
|
| 77 |
-
"w": [
|
| 78 |
-
0.25,
|
| 79 |
-
0.25,
|
| 80 |
-
0.25,
|
| 81 |
-
0.25
|
| 82 |
-
],
|
| 83 |
-
"test_mae": 5.612792583388805
|
| 84 |
-
},
|
| 85 |
-
"inverse_mae": {
|
| 86 |
-
"w": [
|
| 87 |
-
0.28213323004306484,
|
| 88 |
-
0.22633132223221528,
|
| 89 |
-
0.4020856514147427,
|
| 90 |
-
0.0894497963099773
|
| 91 |
-
],
|
| 92 |
-
"test_mae": 3.9445735906379418
|
| 93 |
-
},
|
| 94 |
-
"constrained_mae": {
|
| 95 |
-
"w": [
|
| 96 |
-
0.0,
|
| 97 |
-
5.025493909904784e-15,
|
| 98 |
-
0.9999999999999949,
|
| 99 |
-
0.0
|
| 100 |
-
],
|
| 101 |
-
"test_mae": 2.606399976137096
|
| 102 |
-
},
|
| 103 |
-
"constrained_mse": {
|
| 104 |
-
"w": [
|
| 105 |
-
0.21952231081723392,
|
| 106 |
-
0.0,
|
| 107 |
-
0.7804776891824843,
|
| 108 |
-
2.8179414894790747e-13
|
| 109 |
-
],
|
| 110 |
-
"test_mae": 2.6333455113190545
|
| 111 |
-
}
|
| 112 |
-
},
|
| 113 |
-
"best_individual_on_cal": {
|
| 114 |
-
"model": "arima",
|
| 115 |
-
"test_mae": 2.6063999761370877
|
| 116 |
-
},
|
| 117 |
-
"winner": {
|
| 118 |
-
"method": "best_individual",
|
| 119 |
-
"test_mae": 2.6063999761370877
|
| 120 |
-
}
|
| 121 |
-
},
|
| 122 |
-
"DCOILWTICO_28": {
|
| 123 |
-
"n_cal_folds": 10,
|
| 124 |
-
"n_test_folds": 10,
|
| 125 |
-
"models": [
|
| 126 |
-
"chronos",
|
| 127 |
-
"timesfm",
|
| 128 |
-
"arima",
|
| 129 |
-
"prophet"
|
| 130 |
-
],
|
| 131 |
-
"weights": {
|
| 132 |
-
"equal": {
|
| 133 |
-
"w": [
|
| 134 |
-
0.25,
|
| 135 |
-
0.25,
|
| 136 |
-
0.25,
|
| 137 |
-
0.25
|
| 138 |
-
],
|
| 139 |
-
"test_mae": 7.224652873063855
|
| 140 |
-
},
|
| 141 |
-
"inverse_mae": {
|
| 142 |
-
"w": [
|
| 143 |
-
0.23850653345434814,
|
| 144 |
-
0.3008301142852576,
|
| 145 |
-
0.32149310365193035,
|
| 146 |
-
0.13917024860846383
|
| 147 |
-
],
|
| 148 |
-
"test_mae": 6.73982107186095
|
| 149 |
-
},
|
| 150 |
-
"constrained_mae": {
|
| 151 |
-
"w": [
|
| 152 |
-
1.4923057986615315e-14,
|
| 153 |
-
0.0,
|
| 154 |
-
0.9999999999999623,
|
| 155 |
-
2.2904834182010197e-14
|
| 156 |
-
],
|
| 157 |
-
"test_mae": 5.30872788303258
|
| 158 |
-
},
|
| 159 |
-
"constrained_mse": {
|
| 160 |
-
"w": [
|
| 161 |
-
0.0,
|
| 162 |
-
0.5605029591213022,
|
| 163 |
-
0.4394970408771834,
|
| 164 |
-
1.5144498461763077e-12
|
| 165 |
-
],
|
| 166 |
-
"test_mae": 6.268328694014642
|
| 167 |
-
}
|
| 168 |
-
},
|
| 169 |
-
"best_individual_on_cal": {
|
| 170 |
-
"model": "arima",
|
| 171 |
-
"test_mae": 5.308727883032449
|
| 172 |
-
},
|
| 173 |
-
"winner": {
|
| 174 |
-
"method": "best_individual",
|
| 175 |
-
"test_mae": 5.308727883032449
|
| 176 |
-
}
|
| 177 |
-
},
|
| 178 |
-
"PCOPPUSDM_7": {
|
| 179 |
-
"n_cal_folds": 3,
|
| 180 |
-
"n_test_folds": 3,
|
| 181 |
-
"models": [
|
| 182 |
-
"chronos",
|
| 183 |
-
"timesfm",
|
| 184 |
-
"arima",
|
| 185 |
-
"prophet"
|
| 186 |
-
],
|
| 187 |
-
"weights": {
|
| 188 |
-
"equal": {
|
| 189 |
-
"w": [
|
| 190 |
-
0.25,
|
| 191 |
-
0.25,
|
| 192 |
-
0.25,
|
| 193 |
-
0.25
|
| 194 |
-
],
|
| 195 |
-
"test_mae": 1490.0940767617776
|
| 196 |
-
},
|
| 197 |
-
"inverse_mae": {
|
| 198 |
-
"w": [
|
| 199 |
-
0.27104333378246154,
|
| 200 |
-
0.17597353969029747,
|
| 201 |
-
0.2509767796737437,
|
| 202 |
-
0.30200634685349736
|
| 203 |
-
],
|
| 204 |
-
"test_mae": 1510.2305023002107
|
| 205 |
-
},
|
| 206 |
-
"constrained_mae": {
|
| 207 |
-
"w": [
|
| 208 |
-
0.0,
|
| 209 |
-
0.0,
|
| 210 |
-
0.0,
|
| 211 |
-
1.0
|
| 212 |
-
],
|
| 213 |
-
"test_mae": 2368.6000030761893
|
| 214 |
-
},
|
| 215 |
-
"constrained_mse": {
|
| 216 |
-
"w": [
|
| 217 |
-
0.25,
|
| 218 |
-
0.25,
|
| 219 |
-
0.25,
|
| 220 |
-
0.25
|
| 221 |
-
],
|
| 222 |
-
"test_mae": 1490.0940767617776
|
| 223 |
-
}
|
| 224 |
-
},
|
| 225 |
-
"best_individual_on_cal": {
|
| 226 |
-
"model": "prophet",
|
| 227 |
-
"test_mae": 2368.6000030761893
|
| 228 |
-
},
|
| 229 |
-
"winner": {
|
| 230 |
-
"method": "equal",
|
| 231 |
-
"test_mae": 1490.0940767617776
|
| 232 |
-
}
|
| 233 |
-
},
|
| 234 |
-
"PCOPPUSDM_14": {
|
| 235 |
-
"n_cal_folds": 3,
|
| 236 |
-
"n_test_folds": 3,
|
| 237 |
-
"models": [
|
| 238 |
-
"chronos",
|
| 239 |
-
"timesfm",
|
| 240 |
-
"arima",
|
| 241 |
-
"prophet"
|
| 242 |
-
],
|
| 243 |
-
"weights": {
|
| 244 |
-
"equal": {
|
| 245 |
-
"w": [
|
| 246 |
-
0.25,
|
| 247 |
-
0.25,
|
| 248 |
-
0.25,
|
| 249 |
-
0.25
|
| 250 |
-
],
|
| 251 |
-
"test_mae": 1322.8195925914633
|
| 252 |
-
},
|
| 253 |
-
"inverse_mae": {
|
| 254 |
-
"w": [
|
| 255 |
-
0.39909529037167984,
|
| 256 |
-
0.15858707123054439,
|
| 257 |
-
0.28187978431797855,
|
| 258 |
-
0.1604378540797973
|
| 259 |
-
],
|
| 260 |
-
"test_mae": 1149.0099023538414
|
| 261 |
-
},
|
| 262 |
-
"constrained_mae": {
|
| 263 |
-
"w": [
|
| 264 |
-
1.0,
|
| 265 |
-
0.0,
|
| 266 |
-
0.0,
|
| 267 |
-
0.0
|
| 268 |
-
],
|
| 269 |
-
"test_mae": 835.4762629006885
|
| 270 |
-
},
|
| 271 |
-
"constrained_mse": {
|
| 272 |
-
"w": [
|
| 273 |
-
0.25,
|
| 274 |
-
0.25,
|
| 275 |
-
0.25,
|
| 276 |
-
0.25
|
| 277 |
-
],
|
| 278 |
-
"test_mae": 1322.8195925914633
|
| 279 |
-
}
|
| 280 |
-
},
|
| 281 |
-
"best_individual_on_cal": {
|
| 282 |
-
"model": "chronos",
|
| 283 |
-
"test_mae": 835.4762629006885
|
| 284 |
-
},
|
| 285 |
-
"winner": {
|
| 286 |
-
"method": "constrained_mae",
|
| 287 |
-
"test_mae": 835.4762629006885
|
| 288 |
-
}
|
| 289 |
-
},
|
| 290 |
-
"PCOPPUSDM_28": {
|
| 291 |
-
"n_cal_folds": 3,
|
| 292 |
-
"n_test_folds": 3,
|
| 293 |
-
"models": [
|
| 294 |
-
"chronos",
|
| 295 |
-
"timesfm",
|
| 296 |
-
"arima",
|
| 297 |
-
"prophet"
|
| 298 |
-
],
|
| 299 |
-
"weights": {
|
| 300 |
-
"equal": {
|
| 301 |
-
"w": [
|
| 302 |
-
0.25,
|
| 303 |
-
0.25,
|
| 304 |
-
0.25,
|
| 305 |
-
0.25
|
| 306 |
-
],
|
| 307 |
-
"test_mae": 968.7983373413057
|
| 308 |
-
},
|
| 309 |
-
"inverse_mae": {
|
| 310 |
-
"w": [
|
| 311 |
-
0.24317295792125612,
|
| 312 |
-
0.28640862860805355,
|
| 313 |
-
0.1904195773780233,
|
| 314 |
-
0.2799988360926669
|
| 315 |
-
],
|
| 316 |
-
"test_mae": 988.2430854488761
|
| 317 |
-
},
|
| 318 |
-
"constrained_mae": {
|
| 319 |
-
"w": [
|
| 320 |
-
0.0,
|
| 321 |
-
1.0,
|
| 322 |
-
0.0,
|
| 323 |
-
0.0
|
| 324 |
-
],
|
| 325 |
-
"test_mae": 1383.8323251118418
|
| 326 |
-
},
|
| 327 |
-
"constrained_mse": {
|
| 328 |
-
"w": [
|
| 329 |
-
0.25,
|
| 330 |
-
0.25,
|
| 331 |
-
0.25,
|
| 332 |
-
0.25
|
| 333 |
-
],
|
| 334 |
-
"test_mae": 968.7983373413057
|
| 335 |
-
}
|
| 336 |
-
},
|
| 337 |
-
"best_individual_on_cal": {
|
| 338 |
-
"model": "timesfm",
|
| 339 |
-
"test_mae": 1383.8323251118418
|
| 340 |
-
},
|
| 341 |
-
"winner": {
|
| 342 |
-
"method": "equal",
|
| 343 |
-
"test_mae": 968.7983373413057
|
| 344 |
-
}
|
| 345 |
-
},
|
| 346 |
-
"DEXTAUS_7": {
|
| 347 |
-
"n_cal_folds": 10,
|
| 348 |
-
"n_test_folds": 10,
|
| 349 |
-
"models": [
|
| 350 |
-
"chronos",
|
| 351 |
-
"timesfm",
|
| 352 |
-
"arima",
|
| 353 |
-
"prophet"
|
| 354 |
-
],
|
| 355 |
-
"weights": {
|
| 356 |
-
"equal": {
|
| 357 |
-
"w": [
|
| 358 |
-
0.25,
|
| 359 |
-
0.25,
|
| 360 |
-
0.25,
|
| 361 |
-
0.25
|
| 362 |
-
],
|
| 363 |
-
"test_mae": 0.2169347525199409
|
| 364 |
-
},
|
| 365 |
-
"inverse_mae": {
|
| 366 |
-
"w": [
|
| 367 |
-
0.34398899758591117,
|
| 368 |
-
0.2030939191106745,
|
| 369 |
-
0.3764283233385005,
|
| 370 |
-
0.07648875996491374
|
| 371 |
-
],
|
| 372 |
-
"test_mae": 0.1658846094174201
|
| 373 |
-
},
|
| 374 |
-
"constrained_mae": {
|
| 375 |
-
"w": [
|
| 376 |
-
0.0,
|
| 377 |
-
7.008282842946293e-16,
|
| 378 |
-
0.9999999999999989,
|
| 379 |
-
4.579669976578766e-16
|
| 380 |
-
],
|
| 381 |
-
"test_mae": 0.12304418839562406
|
| 382 |
-
},
|
| 383 |
-
"constrained_mse": {
|
| 384 |
-
"w": [
|
| 385 |
-
0.3806257863168961,
|
| 386 |
-
8.153200337090993e-17,
|
| 387 |
-
0.619374213683104,
|
| 388 |
-
0.0
|
| 389 |
-
],
|
| 390 |
-
"test_mae": 0.12205338531046768
|
| 391 |
-
}
|
| 392 |
-
},
|
| 393 |
-
"best_individual_on_cal": {
|
| 394 |
-
"model": "arima",
|
| 395 |
-
"test_mae": 0.12304418839562384
|
| 396 |
-
},
|
| 397 |
-
"winner": {
|
| 398 |
-
"method": "constrained_mse",
|
| 399 |
-
"test_mae": 0.12205338531046768
|
| 400 |
-
}
|
| 401 |
-
},
|
| 402 |
-
"DEXTAUS_14": {
|
| 403 |
-
"n_cal_folds": 10,
|
| 404 |
-
"n_test_folds": 10,
|
| 405 |
-
"models": [
|
| 406 |
-
"chronos",
|
| 407 |
-
"timesfm",
|
| 408 |
-
"arima",
|
| 409 |
-
"prophet"
|
| 410 |
-
],
|
| 411 |
-
"weights": {
|
| 412 |
-
"equal": {
|
| 413 |
-
"w": [
|
| 414 |
-
0.25,
|
| 415 |
-
0.25,
|
| 416 |
-
0.25,
|
| 417 |
-
0.25
|
| 418 |
-
],
|
| 419 |
-
"test_mae": 0.2936029051307666
|
| 420 |
-
},
|
| 421 |
-
"inverse_mae": {
|
| 422 |
-
"w": [
|
| 423 |
-
0.3024605314294574,
|
| 424 |
-
0.20677440280922138,
|
| 425 |
-
0.3973126914677932,
|
| 426 |
-
0.09345237429352793
|
| 427 |
-
],
|
| 428 |
-
"test_mae": 0.24062725397849288
|
| 429 |
-
},
|
| 430 |
-
"constrained_mae": {
|
| 431 |
-
"w": [
|
| 432 |
-
0.0,
|
| 433 |
-
0.0,
|
| 434 |
-
1.0,
|
| 435 |
-
0.0
|
| 436 |
-
],
|
| 437 |
-
"test_mae": 0.2075701838535929
|
| 438 |
-
},
|
| 439 |
-
"constrained_mse": {
|
| 440 |
-
"w": [
|
| 441 |
-
0.20409965483488535,
|
| 442 |
-
1.196959198423997e-16,
|
| 443 |
-
0.7959003451651147,
|
| 444 |
-
0.0
|
| 445 |
-
],
|
| 446 |
-
"test_mae": 0.20767726865065442
|
| 447 |
-
}
|
| 448 |
-
},
|
| 449 |
-
"best_individual_on_cal": {
|
| 450 |
-
"model": "arima",
|
| 451 |
-
"test_mae": 0.2075701838535929
|
| 452 |
-
},
|
| 453 |
-
"winner": {
|
| 454 |
-
"method": "constrained_mae",
|
| 455 |
-
"test_mae": 0.2075701838535929
|
| 456 |
-
}
|
| 457 |
-
},
|
| 458 |
-
"DEXTAUS_28": {
|
| 459 |
-
"n_cal_folds": 10,
|
| 460 |
-
"n_test_folds": 10,
|
| 461 |
-
"models": [
|
| 462 |
-
"chronos",
|
| 463 |
-
"timesfm",
|
| 464 |
-
"arima",
|
| 465 |
-
"prophet"
|
| 466 |
-
],
|
| 467 |
-
"weights": {
|
| 468 |
-
"equal": {
|
| 469 |
-
"w": [
|
| 470 |
-
0.25,
|
| 471 |
-
0.25,
|
| 472 |
-
0.25,
|
| 473 |
-
0.25
|
| 474 |
-
],
|
| 475 |
-
"test_mae": 0.35161458970616255
|
| 476 |
-
},
|
| 477 |
-
"inverse_mae": {
|
| 478 |
-
"w": [
|
| 479 |
-
0.31779598685220195,
|
| 480 |
-
0.27176079256586594,
|
| 481 |
-
0.28189025800444834,
|
| 482 |
-
0.12855296257748378
|
| 483 |
-
],
|
| 484 |
-
"test_mae": 0.3189607034469092
|
| 485 |
-
},
|
| 486 |
-
"constrained_mae": {
|
| 487 |
-
"w": [
|
| 488 |
-
0.9999999999999998,
|
| 489 |
-
0.0,
|
| 490 |
-
0.0,
|
| 491 |
-
3.1918911957973246e-16
|
| 492 |
-
],
|
| 493 |
-
"test_mae": 0.289064216740161
|
| 494 |
-
},
|
| 495 |
-
"constrained_mse": {
|
| 496 |
-
"w": [
|
| 497 |
-
0.45663759735298354,
|
| 498 |
-
0.10339949724699603,
|
| 499 |
-
0.4399629054000205,
|
| 500 |
-
0.0
|
| 501 |
-
],
|
| 502 |
-
"test_mae": 0.27882969196380114
|
| 503 |
-
}
|
| 504 |
-
},
|
| 505 |
-
"best_individual_on_cal": {
|
| 506 |
-
"model": "chronos",
|
| 507 |
-
"test_mae": 0.2890642167401609
|
| 508 |
-
},
|
| 509 |
-
"winner": {
|
| 510 |
-
"method": "constrained_mse",
|
| 511 |
-
"test_mae": 0.27882969196380114
|
| 512 |
-
}
|
| 513 |
-
},
|
| 514 |
-
"DEXKOUS_7": {
|
| 515 |
-
"n_cal_folds": 10,
|
| 516 |
-
"n_test_folds": 10,
|
| 517 |
-
"models": [
|
| 518 |
-
"chronos",
|
| 519 |
-
"timesfm",
|
| 520 |
-
"arima",
|
| 521 |
-
"prophet"
|
| 522 |
-
],
|
| 523 |
-
"weights": {
|
| 524 |
-
"equal": {
|
| 525 |
-
"w": [
|
| 526 |
-
0.25,
|
| 527 |
-
0.25,
|
| 528 |
-
0.25,
|
| 529 |
-
0.25
|
| 530 |
-
],
|
| 531 |
-
"test_mae": 17.2493699999521
|
| 532 |
-
},
|
| 533 |
-
"inverse_mae": {
|
| 534 |
-
"w": [
|
| 535 |
-
0.22521754050965248,
|
| 536 |
-
0.2661802247036112,
|
| 537 |
-
0.3761094665932334,
|
| 538 |
-
0.1324927681935029
|
| 539 |
-
],
|
| 540 |
-
"test_mae": 15.47479328474102
|
| 541 |
-
},
|
| 542 |
-
"constrained_mae": {
|
| 543 |
-
"w": [
|
| 544 |
-
0.0,
|
| 545 |
-
2.7089441800853084e-14,
|
| 546 |
-
0.9999999999999729,
|
| 547 |
-
0.0
|
| 548 |
-
],
|
| 549 |
-
"test_mae": 14.0900150189361
|
| 550 |
-
},
|
| 551 |
-
"constrained_mse": {
|
| 552 |
-
"w": [
|
| 553 |
-
1.4068121662922204e-19,
|
| 554 |
-
0.19202529383105713,
|
| 555 |
-
0.8079747057066696,
|
| 556 |
-
4.6227315218243986e-10
|
| 557 |
-
],
|
| 558 |
-
"test_mae": 14.093086604275276
|
| 559 |
-
}
|
| 560 |
-
},
|
| 561 |
-
"best_individual_on_cal": {
|
| 562 |
-
"model": "arima",
|
| 563 |
-
"test_mae": 14.0900150189361
|
| 564 |
-
},
|
| 565 |
-
"winner": {
|
| 566 |
-
"method": "constrained_mae",
|
| 567 |
-
"test_mae": 14.0900150189361
|
| 568 |
-
}
|
| 569 |
-
},
|
| 570 |
-
"DEXKOUS_14": {
|
| 571 |
-
"n_cal_folds": 10,
|
| 572 |
-
"n_test_folds": 10,
|
| 573 |
-
"models": [
|
| 574 |
-
"chronos",
|
| 575 |
-
"timesfm",
|
| 576 |
-
"arima",
|
| 577 |
-
"prophet"
|
| 578 |
-
],
|
| 579 |
-
"weights": {
|
| 580 |
-
"equal": {
|
| 581 |
-
"w": [
|
| 582 |
-
0.25,
|
| 583 |
-
0.25,
|
| 584 |
-
0.25,
|
| 585 |
-
0.25
|
| 586 |
-
],
|
| 587 |
-
"test_mae": 19.357951817590667
|
| 588 |
-
},
|
| 589 |
-
"inverse_mae": {
|
| 590 |
-
"w": [
|
| 591 |
-
0.3500118447028979,
|
| 592 |
-
0.25958141131048756,
|
| 593 |
-
0.2744350765852677,
|
| 594 |
-
0.11597166740134691
|
| 595 |
-
],
|
| 596 |
-
"test_mae": 17.40246559654232
|
| 597 |
-
},
|
| 598 |
-
"constrained_mae": {
|
| 599 |
-
"w": [
|
| 600 |
-
0.9999999999992815,
|
| 601 |
-
3.2990277176712823e-13,
|
| 602 |
-
3.88689080920988e-13,
|
| 603 |
-
0.0
|
| 604 |
-
],
|
| 605 |
-
"test_mae": 13.478487470042296
|
| 606 |
-
},
|
| 607 |
-
"constrained_mse": {
|
| 608 |
-
"w": [
|
| 609 |
-
0.999999999787164,
|
| 610 |
-
0.0,
|
| 611 |
-
0.0,
|
| 612 |
-
2.1283591823683064e-10
|
| 613 |
-
],
|
| 614 |
-
"test_mae": 13.478487473311748
|
| 615 |
-
}
|
| 616 |
-
},
|
| 617 |
-
"best_individual_on_cal": {
|
| 618 |
-
"model": "chronos",
|
| 619 |
-
"test_mae": 13.4784874700395
|
| 620 |
-
},
|
| 621 |
-
"winner": {
|
| 622 |
-
"method": "best_individual",
|
| 623 |
-
"test_mae": 13.4784874700395
|
| 624 |
-
}
|
| 625 |
-
},
|
| 626 |
-
"DEXKOUS_28": {
|
| 627 |
-
"n_cal_folds": 10,
|
| 628 |
-
"n_test_folds": 10,
|
| 629 |
-
"models": [
|
| 630 |
-
"chronos",
|
| 631 |
-
"timesfm",
|
| 632 |
-
"arima",
|
| 633 |
-
"prophet"
|
| 634 |
-
],
|
| 635 |
-
"weights": {
|
| 636 |
-
"equal": {
|
| 637 |
-
"w": [
|
| 638 |
-
0.25,
|
| 639 |
-
0.25,
|
| 640 |
-
0.25,
|
| 641 |
-
0.25
|
| 642 |
-
],
|
| 643 |
-
"test_mae": 24.8683981319863
|
| 644 |
-
},
|
| 645 |
-
"inverse_mae": {
|
| 646 |
-
"w": [
|
| 647 |
-
0.15714338435667446,
|
| 648 |
-
0.3032008336686258,
|
| 649 |
-
0.3174445784155295,
|
| 650 |
-
0.22221120355917026
|
| 651 |
-
],
|
| 652 |
-
"test_mae": 23.767772135429315
|
| 653 |
-
},
|
| 654 |
-
"constrained_mae": {
|
| 655 |
-
"w": [
|
| 656 |
-
0.0,
|
| 657 |
-
0.0,
|
| 658 |
-
1.0,
|
| 659 |
-
0.0
|
| 660 |
-
],
|
| 661 |
-
"test_mae": 13.038534452266783
|
| 662 |
-
},
|
| 663 |
-
"constrained_mse": {
|
| 664 |
-
"w": [
|
| 665 |
-
0.0,
|
| 666 |
-
1.6482941097956984e-10,
|
| 667 |
-
0.9999999997453165,
|
| 668 |
-
8.9854093955618e-11
|
| 669 |
-
],
|
| 670 |
-
"test_mae": 13.038534456323145
|
| 671 |
-
}
|
| 672 |
-
},
|
| 673 |
-
"best_individual_on_cal": {
|
| 674 |
-
"model": "arima",
|
| 675 |
-
"test_mae": 13.038534452266783
|
| 676 |
-
},
|
| 677 |
-
"winner": {
|
| 678 |
-
"method": "constrained_mae",
|
| 679 |
-
"test_mae": 13.038534452266783
|
| 680 |
-
}
|
| 681 |
-
},
|
| 682 |
-
"DEXJPUS_7": {
|
| 683 |
-
"n_cal_folds": 10,
|
| 684 |
-
"n_test_folds": 10,
|
| 685 |
-
"models": [
|
| 686 |
-
"chronos",
|
| 687 |
-
"timesfm",
|
| 688 |
-
"arima",
|
| 689 |
-
"prophet"
|
| 690 |
-
],
|
| 691 |
-
"weights": {
|
| 692 |
-
"equal": {
|
| 693 |
-
"w": [
|
| 694 |
-
0.25,
|
| 695 |
-
0.25,
|
| 696 |
-
0.25,
|
| 697 |
-
0.25
|
| 698 |
-
],
|
| 699 |
-
"test_mae": 2.0058613373406016
|
| 700 |
-
},
|
| 701 |
-
"inverse_mae": {
|
| 702 |
-
"w": [
|
| 703 |
-
0.3311569291093271,
|
| 704 |
-
0.21966516526756977,
|
| 705 |
-
0.27781607384114676,
|
| 706 |
-
0.17136183178195635
|
| 707 |
-
],
|
| 708 |
-
"test_mae": 1.7598609660764388
|
| 709 |
-
},
|
| 710 |
-
"constrained_mae": {
|
| 711 |
-
"w": [
|
| 712 |
-
0.9999999999999993,
|
| 713 |
-
0.0,
|
| 714 |
-
0.0,
|
| 715 |
-
7.14706072102444e-16
|
| 716 |
-
],
|
| 717 |
-
"test_mae": 0.9624409634715991
|
| 718 |
-
},
|
| 719 |
-
"constrained_mse": {
|
| 720 |
-
"w": [
|
| 721 |
-
0.637656517780962,
|
| 722 |
-
0.0,
|
| 723 |
-
0.36234348221903795,
|
| 724 |
-
2.0816681711721676e-17
|
| 725 |
-
],
|
| 726 |
-
"test_mae": 1.1158006833860175
|
| 727 |
-
}
|
| 728 |
-
},
|
| 729 |
-
"best_individual_on_cal": {
|
| 730 |
-
"model": "chronos",
|
| 731 |
-
"test_mae": 0.962440963471597
|
| 732 |
-
},
|
| 733 |
-
"winner": {
|
| 734 |
-
"method": "best_individual",
|
| 735 |
-
"test_mae": 0.962440963471597
|
| 736 |
-
}
|
| 737 |
-
},
|
| 738 |
-
"DEXJPUS_14": {
|
| 739 |
-
"n_cal_folds": 10,
|
| 740 |
-
"n_test_folds": 10,
|
| 741 |
-
"models": [
|
| 742 |
-
"chronos",
|
| 743 |
-
"timesfm",
|
| 744 |
-
"arima",
|
| 745 |
-
"prophet"
|
| 746 |
-
],
|
| 747 |
-
"weights": {
|
| 748 |
-
"equal": {
|
| 749 |
-
"w": [
|
| 750 |
-
0.25,
|
| 751 |
-
0.25,
|
| 752 |
-
0.25,
|
| 753 |
-
0.25
|
| 754 |
-
],
|
| 755 |
-
"test_mae": 2.0585639763398134
|
| 756 |
-
},
|
| 757 |
-
"inverse_mae": {
|
| 758 |
-
"w": [
|
| 759 |
-
0.29221948346213755,
|
| 760 |
-
0.30006908767689383,
|
| 761 |
-
0.3336814964148649,
|
| 762 |
-
0.07402993244610366
|
| 763 |
-
],
|
| 764 |
-
"test_mae": 1.525371337574877
|
| 765 |
-
},
|
| 766 |
-
"constrained_mae": {
|
| 767 |
-
"w": [
|
| 768 |
-
0.0,
|
| 769 |
-
0.0,
|
| 770 |
-
0.9999999999998224,
|
| 771 |
-
1.7753796613177788e-13
|
| 772 |
-
],
|
| 773 |
-
"test_mae": 0.9391751508495592
|
| 774 |
-
},
|
| 775 |
-
"constrained_mse": {
|
| 776 |
-
"w": [
|
| 777 |
-
0.0,
|
| 778 |
-
0.23909961575984545,
|
| 779 |
-
0.7609003842401545,
|
| 780 |
-
0.0
|
| 781 |
-
],
|
| 782 |
-
"test_mae": 1.1619170740566178
|
| 783 |
-
}
|
| 784 |
-
},
|
| 785 |
-
"best_individual_on_cal": {
|
| 786 |
-
"model": "arima",
|
| 787 |
-
"test_mae": 0.9391751508489655
|
| 788 |
-
},
|
| 789 |
-
"winner": {
|
| 790 |
-
"method": "best_individual",
|
| 791 |
-
"test_mae": 0.9391751508489655
|
| 792 |
-
}
|
| 793 |
-
},
|
| 794 |
-
"DEXJPUS_28": {
|
| 795 |
-
"n_cal_folds": 10,
|
| 796 |
-
"n_test_folds": 10,
|
| 797 |
-
"models": [
|
| 798 |
-
"chronos",
|
| 799 |
-
"timesfm",
|
| 800 |
-
"arima",
|
| 801 |
-
"prophet"
|
| 802 |
-
],
|
| 803 |
-
"weights": {
|
| 804 |
-
"equal": {
|
| 805 |
-
"w": [
|
| 806 |
-
0.25,
|
| 807 |
-
0.25,
|
| 808 |
-
0.25,
|
| 809 |
-
0.25
|
| 810 |
-
],
|
| 811 |
-
"test_mae": 2.6223114452299363
|
| 812 |
-
},
|
| 813 |
-
"inverse_mae": {
|
| 814 |
-
"w": [
|
| 815 |
-
0.2431707261347647,
|
| 816 |
-
0.2670867329969705,
|
| 817 |
-
0.36747924632317114,
|
| 818 |
-
0.12226329454509363
|
| 819 |
-
],
|
| 820 |
-
"test_mae": 2.501007095618067
|
| 821 |
-
},
|
| 822 |
-
"constrained_mae": {
|
| 823 |
-
"w": [
|
| 824 |
-
0.0,
|
| 825 |
-
0.0,
|
| 826 |
-
1.0,
|
| 827 |
-
0.0
|
| 828 |
-
],
|
| 829 |
-
"test_mae": 2.3202441940310328
|
| 830 |
-
},
|
| 831 |
-
"constrained_mse": {
|
| 832 |
-
"w": [
|
| 833 |
-
0.12111050197987697,
|
| 834 |
-
1.124100812432969e-15,
|
| 835 |
-
0.8788894980201218,
|
| 836 |
-
0.0
|
| 837 |
-
],
|
| 838 |
-
"test_mae": 2.284742353079749
|
| 839 |
-
}
|
| 840 |
-
},
|
| 841 |
-
"best_individual_on_cal": {
|
| 842 |
-
"model": "arima",
|
| 843 |
-
"test_mae": 2.3202441940310328
|
| 844 |
-
},
|
| 845 |
-
"winner": {
|
| 846 |
-
"method": "constrained_mse",
|
| 847 |
-
"test_mae": 2.284742353079749
|
| 848 |
-
}
|
| 849 |
-
},
|
| 850 |
-
"DEXUSEU_7": {
|
| 851 |
-
"n_cal_folds": 10,
|
| 852 |
-
"n_test_folds": 10,
|
| 853 |
-
"models": [
|
| 854 |
-
"chronos",
|
| 855 |
-
"timesfm",
|
| 856 |
-
"arima",
|
| 857 |
-
"prophet"
|
| 858 |
-
],
|
| 859 |
-
"weights": {
|
| 860 |
-
"equal": {
|
| 861 |
-
"w": [
|
| 862 |
-
0.25,
|
| 863 |
-
0.25,
|
| 864 |
-
0.25,
|
| 865 |
-
0.25
|
| 866 |
-
],
|
| 867 |
-
"test_mae": 0.01777263656328388
|
| 868 |
-
},
|
| 869 |
-
"inverse_mae": {
|
| 870 |
-
"w": [
|
| 871 |
-
0.4380311521257709,
|
| 872 |
-
0.1895078632684431,
|
| 873 |
-
0.2934679866590765,
|
| 874 |
-
0.07899299794670979
|
| 875 |
-
],
|
| 876 |
-
"test_mae": 0.012544562664192396
|
| 877 |
-
},
|
| 878 |
-
"constrained_mae": {
|
| 879 |
-
"w": [
|
| 880 |
-
0.9999999999999984,
|
| 881 |
-
1.0061396160665477e-15,
|
| 882 |
-
5.846018114041837e-16,
|
| 883 |
-
0.0
|
| 884 |
-
],
|
| 885 |
-
"test_mae": 0.008009630047676911
|
| 886 |
-
},
|
| 887 |
-
"constrained_mse": {
|
| 888 |
-
"w": [
|
| 889 |
-
0.88076958835974,
|
| 890 |
-
5.551115123125784e-17,
|
| 891 |
-
0.11923041164026013,
|
| 892 |
-
5.551115123125784e-17
|
| 893 |
-
],
|
| 894 |
-
"test_mae": 0.00812923667806015
|
| 895 |
-
}
|
| 896 |
-
},
|
| 897 |
-
"best_individual_on_cal": {
|
| 898 |
-
"model": "chronos",
|
| 899 |
-
"test_mae": 0.008009630047676897
|
| 900 |
-
},
|
| 901 |
-
"winner": {
|
| 902 |
-
"method": "best_individual",
|
| 903 |
-
"test_mae": 0.008009630047676897
|
| 904 |
-
}
|
| 905 |
-
},
|
| 906 |
-
"DEXUSEU_14": {
|
| 907 |
-
"n_cal_folds": 10,
|
| 908 |
-
"n_test_folds": 10,
|
| 909 |
-
"models": [
|
| 910 |
-
"chronos",
|
| 911 |
-
"timesfm",
|
| 912 |
-
"arima",
|
| 913 |
-
"prophet"
|
| 914 |
-
],
|
| 915 |
-
"weights": {
|
| 916 |
-
"equal": {
|
| 917 |
-
"w": [
|
| 918 |
-
0.25,
|
| 919 |
-
0.25,
|
| 920 |
-
0.25,
|
| 921 |
-
0.25
|
| 922 |
-
],
|
| 923 |
-
"test_mae": 0.01766253143684469
|
| 924 |
-
},
|
| 925 |
-
"inverse_mae": {
|
| 926 |
-
"w": [
|
| 927 |
-
0.3649772970412571,
|
| 928 |
-
0.20972059927142733,
|
| 929 |
-
0.2903737730393877,
|
| 930 |
-
0.13492833064792778
|
| 931 |
-
],
|
| 932 |
-
"test_mae": 0.015437376589926739
|
| 933 |
-
},
|
| 934 |
-
"constrained_mae": {
|
| 935 |
-
"w": [
|
| 936 |
-
0.9999999999999998,
|
| 937 |
-
0.0,
|
| 938 |
-
0.0,
|
| 939 |
-
2.1510571102112403e-16
|
| 940 |
-
],
|
| 941 |
-
"test_mae": 0.01478179445033124
|
| 942 |
-
},
|
| 943 |
-
"constrained_mse": {
|
| 944 |
-
"w": [
|
| 945 |
-
0.5541512994206012,
|
| 946 |
-
1.3877787807814457e-16,
|
| 947 |
-
0.4458487005793988,
|
| 948 |
-
1.0408340855860843e-17
|
| 949 |
-
],
|
| 950 |
-
"test_mae": 0.012606685154728608
|
| 951 |
-
}
|
| 952 |
-
},
|
| 953 |
-
"best_individual_on_cal": {
|
| 954 |
-
"model": "chronos",
|
| 955 |
-
"test_mae": 0.014781794450331237
|
| 956 |
-
},
|
| 957 |
-
"winner": {
|
| 958 |
-
"method": "constrained_mse",
|
| 959 |
-
"test_mae": 0.012606685154728608
|
| 960 |
-
}
|
| 961 |
-
},
|
| 962 |
-
"DEXUSEU_28": {
|
| 963 |
-
"n_cal_folds": 10,
|
| 964 |
-
"n_test_folds": 10,
|
| 965 |
-
"models": [
|
| 966 |
-
"chronos",
|
| 967 |
-
"timesfm",
|
| 968 |
-
"arima",
|
| 969 |
-
"prophet"
|
| 970 |
-
],
|
| 971 |
-
"weights": {
|
| 972 |
-
"equal": {
|
| 973 |
-
"w": [
|
| 974 |
-
0.25,
|
| 975 |
-
0.25,
|
| 976 |
-
0.25,
|
| 977 |
-
0.25
|
| 978 |
-
],
|
| 979 |
-
"test_mae": 0.017842508329409604
|
| 980 |
-
},
|
| 981 |
-
"inverse_mae": {
|
| 982 |
-
"w": [
|
| 983 |
-
0.3562207101529807,
|
| 984 |
-
0.18924080034829216,
|
| 985 |
-
0.31700784157235296,
|
| 986 |
-
0.13753064792637432
|
| 987 |
-
],
|
| 988 |
-
"test_mae": 0.015970560076149547
|
| 989 |
-
},
|
| 990 |
-
"constrained_mae": {
|
| 991 |
-
"w": [
|
| 992 |
-
0.9999999999999982,
|
| 993 |
-
9.43689570931382e-16,
|
| 994 |
-
0.0,
|
| 995 |
-
8.049116928532376e-16
|
| 996 |
-
],
|
| 997 |
-
"test_mae": 0.014453346940792903
|
| 998 |
-
},
|
| 999 |
-
"constrained_mse": {
|
| 1000 |
-
"w": [
|
| 1001 |
-
0.5446169594084305,
|
| 1002 |
-
2.7755575615628907e-17,
|
| 1003 |
-
0.45538304059156953,
|
| 1004 |
-
0.0
|
| 1005 |
-
],
|
| 1006 |
-
"test_mae": 0.013183660449898013
|
| 1007 |
-
}
|
| 1008 |
-
},
|
| 1009 |
-
"best_individual_on_cal": {
|
| 1010 |
-
"model": "chronos",
|
| 1011 |
-
"test_mae": 0.014453346940792889
|
| 1012 |
-
},
|
| 1013 |
-
"winner": {
|
| 1014 |
-
"method": "constrained_mse",
|
| 1015 |
-
"test_mae": 0.013183660449898013
|
| 1016 |
-
}
|
| 1017 |
-
},
|
| 1018 |
-
"DEXCHUS_7": {
|
| 1019 |
-
"n_cal_folds": 10,
|
| 1020 |
-
"n_test_folds": 10,
|
| 1021 |
-
"models": [
|
| 1022 |
-
"chronos",
|
| 1023 |
-
"timesfm",
|
| 1024 |
-
"arima",
|
| 1025 |
-
"prophet"
|
| 1026 |
-
],
|
| 1027 |
-
"weights": {
|
| 1028 |
-
"equal": {
|
| 1029 |
-
"w": [
|
| 1030 |
-
0.25,
|
| 1031 |
-
0.25,
|
| 1032 |
-
0.25,
|
| 1033 |
-
0.25
|
| 1034 |
-
],
|
| 1035 |
-
"test_mae": 0.034690690500036904
|
| 1036 |
-
},
|
| 1037 |
-
"inverse_mae": {
|
| 1038 |
-
"w": [
|
| 1039 |
-
0.30725895677630083,
|
| 1040 |
-
0.24691376598214834,
|
| 1041 |
-
0.3943485789337087,
|
| 1042 |
-
0.05147869830784206
|
| 1043 |
-
],
|
| 1044 |
-
"test_mae": 0.02117886221826054
|
| 1045 |
-
},
|
| 1046 |
-
"constrained_mae": {
|
| 1047 |
-
"w": [
|
| 1048 |
-
0.0,
|
| 1049 |
-
0.0,
|
| 1050 |
-
0.9999999999999998,
|
| 1051 |
-
2.3409280156677643e-16
|
| 1052 |
-
],
|
| 1053 |
-
"test_mae": 0.015762412884263256
|
| 1054 |
-
},
|
| 1055 |
-
"constrained_mse": {
|
| 1056 |
-
"w": [
|
| 1057 |
-
0.0,
|
| 1058 |
-
0.040015823687684034,
|
| 1059 |
-
0.959984176312316,
|
| 1060 |
-
1.0408340855860841e-17
|
| 1061 |
-
],
|
| 1062 |
-
"test_mae": 0.016130545137926368
|
| 1063 |
-
}
|
| 1064 |
-
},
|
| 1065 |
-
"best_individual_on_cal": {
|
| 1066 |
-
"model": "arima",
|
| 1067 |
-
"test_mae": 0.015762412884263242
|
| 1068 |
-
},
|
| 1069 |
-
"winner": {
|
| 1070 |
-
"method": "best_individual",
|
| 1071 |
-
"test_mae": 0.015762412884263242
|
| 1072 |
-
}
|
| 1073 |
-
},
|
| 1074 |
-
"DEXCHUS_14": {
|
| 1075 |
-
"n_cal_folds": 10,
|
| 1076 |
-
"n_test_folds": 10,
|
| 1077 |
-
"models": [
|
| 1078 |
-
"chronos",
|
| 1079 |
-
"timesfm",
|
| 1080 |
-
"arima",
|
| 1081 |
-
"prophet"
|
| 1082 |
-
],
|
| 1083 |
-
"weights": {
|
| 1084 |
-
"equal": {
|
| 1085 |
-
"w": [
|
| 1086 |
-
0.25,
|
| 1087 |
-
0.25,
|
| 1088 |
-
0.25,
|
| 1089 |
-
0.25
|
| 1090 |
-
],
|
| 1091 |
-
"test_mae": 0.049119233033837334
|
| 1092 |
-
},
|
| 1093 |
-
"inverse_mae": {
|
| 1094 |
-
"w": [
|
| 1095 |
-
0.2988178654996703,
|
| 1096 |
-
0.30220512040404324,
|
| 1097 |
-
0.27971237870613896,
|
| 1098 |
-
0.1192646353901474
|
| 1099 |
-
],
|
| 1100 |
-
"test_mae": 0.04197509948228402
|
| 1101 |
-
},
|
| 1102 |
-
"constrained_mae": {
|
| 1103 |
-
"w": [
|
| 1104 |
-
0.0,
|
| 1105 |
-
0.9999999999999992,
|
| 1106 |
-
3.6082248300317563e-16,
|
| 1107 |
-
3.4174052476743075e-16
|
| 1108 |
-
],
|
| 1109 |
-
"test_mae": 0.03187400458960995
|
| 1110 |
-
},
|
| 1111 |
-
"constrained_mse": {
|
| 1112 |
-
"w": [
|
| 1113 |
-
0.5594517657002177,
|
| 1114 |
-
0.23577483341396505,
|
| 1115 |
-
0.2047734008858172,
|
| 1116 |
-
0.0
|
| 1117 |
-
],
|
| 1118 |
-
"test_mae": 0.033564545616950006
|
| 1119 |
-
}
|
| 1120 |
-
},
|
| 1121 |
-
"best_individual_on_cal": {
|
| 1122 |
-
"model": "timesfm",
|
| 1123 |
-
"test_mae": 0.03187400458960993
|
| 1124 |
-
},
|
| 1125 |
-
"winner": {
|
| 1126 |
-
"method": "best_individual",
|
| 1127 |
-
"test_mae": 0.03187400458960993
|
| 1128 |
-
}
|
| 1129 |
-
},
|
| 1130 |
-
"DEXCHUS_28": {
|
| 1131 |
-
"n_cal_folds": 10,
|
| 1132 |
-
"n_test_folds": 10,
|
| 1133 |
-
"models": [
|
| 1134 |
-
"chronos",
|
| 1135 |
-
"timesfm",
|
| 1136 |
-
"arima",
|
| 1137 |
-
"prophet"
|
| 1138 |
-
],
|
| 1139 |
-
"weights": {
|
| 1140 |
-
"equal": {
|
| 1141 |
-
"w": [
|
| 1142 |
-
0.25,
|
| 1143 |
-
0.25,
|
| 1144 |
-
0.25,
|
| 1145 |
-
0.25
|
| 1146 |
-
],
|
| 1147 |
-
"test_mae": 0.07622515708177849
|
| 1148 |
-
},
|
| 1149 |
-
"inverse_mae": {
|
| 1150 |
-
"w": [
|
| 1151 |
-
0.21374276213191848,
|
| 1152 |
-
0.32878921058258087,
|
| 1153 |
-
0.27206545754178274,
|
| 1154 |
-
0.18540256974371785
|
| 1155 |
-
],
|
| 1156 |
-
"test_mae": 0.07368140063745915
|
| 1157 |
-
},
|
| 1158 |
-
"constrained_mae": {
|
| 1159 |
-
"w": [
|
| 1160 |
-
3.565258741241218e-17,
|
| 1161 |
-
0.9999999999999993,
|
| 1162 |
-
0.0,
|
| 1163 |
-
6.714758455242072e-16
|
| 1164 |
-
],
|
| 1165 |
-
"test_mae": 0.05984540049808135
|
| 1166 |
-
},
|
| 1167 |
-
"constrained_mse": {
|
| 1168 |
-
"w": [
|
| 1169 |
-
0.0,
|
| 1170 |
-
0.7615511144034006,
|
| 1171 |
-
0.23844888559659938,
|
| 1172 |
-
5.308685925196128e-17
|
| 1173 |
-
],
|
| 1174 |
-
"test_mae": 0.06440512615984152
|
| 1175 |
-
}
|
| 1176 |
-
},
|
| 1177 |
-
"best_individual_on_cal": {
|
| 1178 |
-
"model": "timesfm",
|
| 1179 |
-
"test_mae": 0.059845400498081305
|
| 1180 |
-
},
|
| 1181 |
-
"winner": {
|
| 1182 |
-
"method": "best_individual",
|
| 1183 |
-
"test_mae": 0.059845400498081305
|
| 1184 |
-
}
|
| 1185 |
-
}
|
| 1186 |
-
},
|
| 1187 |
-
"elapsed_s": 0.09606218338012695
|
| 1188 |
}
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"description": "Constrained-stacking comparison. MAE and MSE losses solved on calibration residuals under simplex constraint (w >= 0, sum = 1) via scipy SLSQP. Tested on held-out folds. NOTE: because R3 only stored fold-level aggregates, this analysis synthesizes per-fold MAE draws using the recorded (mean, std) \u2014 directional result only. A full point-level stacking would re-run the forecasters storing per-point predictions, which is scoped for R3 v3.",
|
| 3 |
+
"targets_analyzed": 21,
|
| 4 |
+
"winner_counts": {
|
| 5 |
+
"constrained (MAE or MSE)": 9,
|
| 6 |
+
"equal_weights": 2,
|
| 7 |
+
"best_individual": 10
|
| 8 |
+
},
|
| 9 |
+
"per_target_horizon": {
|
| 10 |
+
"DCOILWTICO_7": {
|
| 11 |
+
"n_cal_folds": 10,
|
| 12 |
+
"n_test_folds": 10,
|
| 13 |
+
"models": [
|
| 14 |
+
"chronos",
|
| 15 |
+
"timesfm",
|
| 16 |
+
"arima",
|
| 17 |
+
"prophet"
|
| 18 |
+
],
|
| 19 |
+
"weights": {
|
| 20 |
+
"equal": {
|
| 21 |
+
"w": [
|
| 22 |
+
0.25,
|
| 23 |
+
0.25,
|
| 24 |
+
0.25,
|
| 25 |
+
0.25
|
| 26 |
+
],
|
| 27 |
+
"test_mae": 4.078327693241436
|
| 28 |
+
},
|
| 29 |
+
"inverse_mae": {
|
| 30 |
+
"w": [
|
| 31 |
+
0.3473502883901263,
|
| 32 |
+
0.2560874881405812,
|
| 33 |
+
0.3115195598071785,
|
| 34 |
+
0.08504266366211403
|
| 35 |
+
],
|
| 36 |
+
"test_mae": 3.3276628679064912
|
| 37 |
+
},
|
| 38 |
+
"constrained_mae": {
|
| 39 |
+
"w": [
|
| 40 |
+
0.9999999999996985,
|
| 41 |
+
1.046385200709126e-13,
|
| 42 |
+
0.0,
|
| 43 |
+
1.9696744235629476e-13
|
| 44 |
+
],
|
| 45 |
+
"test_mae": 2.653996344639796
|
| 46 |
+
},
|
| 47 |
+
"constrained_mse": {
|
| 48 |
+
"w": [
|
| 49 |
+
0.71816178869903,
|
| 50 |
+
6.540164218966743e-14,
|
| 51 |
+
0.2818382113009046,
|
| 52 |
+
0.0
|
| 53 |
+
],
|
| 54 |
+
"test_mae": 2.8532434560990985
|
| 55 |
+
}
|
| 56 |
+
},
|
| 57 |
+
"best_individual_on_cal": {
|
| 58 |
+
"model": "chronos",
|
| 59 |
+
"test_mae": 2.6539963446388284
|
| 60 |
+
},
|
| 61 |
+
"winner": {
|
| 62 |
+
"method": "best_individual",
|
| 63 |
+
"test_mae": 2.6539963446388284
|
| 64 |
+
}
|
| 65 |
+
},
|
| 66 |
+
"DCOILWTICO_14": {
|
| 67 |
+
"n_cal_folds": 10,
|
| 68 |
+
"n_test_folds": 10,
|
| 69 |
+
"models": [
|
| 70 |
+
"chronos",
|
| 71 |
+
"timesfm",
|
| 72 |
+
"arima",
|
| 73 |
+
"prophet"
|
| 74 |
+
],
|
| 75 |
+
"weights": {
|
| 76 |
+
"equal": {
|
| 77 |
+
"w": [
|
| 78 |
+
0.25,
|
| 79 |
+
0.25,
|
| 80 |
+
0.25,
|
| 81 |
+
0.25
|
| 82 |
+
],
|
| 83 |
+
"test_mae": 5.612792583388805
|
| 84 |
+
},
|
| 85 |
+
"inverse_mae": {
|
| 86 |
+
"w": [
|
| 87 |
+
0.28213323004306484,
|
| 88 |
+
0.22633132223221528,
|
| 89 |
+
0.4020856514147427,
|
| 90 |
+
0.0894497963099773
|
| 91 |
+
],
|
| 92 |
+
"test_mae": 3.9445735906379418
|
| 93 |
+
},
|
| 94 |
+
"constrained_mae": {
|
| 95 |
+
"w": [
|
| 96 |
+
0.0,
|
| 97 |
+
5.025493909904784e-15,
|
| 98 |
+
0.9999999999999949,
|
| 99 |
+
0.0
|
| 100 |
+
],
|
| 101 |
+
"test_mae": 2.606399976137096
|
| 102 |
+
},
|
| 103 |
+
"constrained_mse": {
|
| 104 |
+
"w": [
|
| 105 |
+
0.21952231081723392,
|
| 106 |
+
0.0,
|
| 107 |
+
0.7804776891824843,
|
| 108 |
+
2.8179414894790747e-13
|
| 109 |
+
],
|
| 110 |
+
"test_mae": 2.6333455113190545
|
| 111 |
+
}
|
| 112 |
+
},
|
| 113 |
+
"best_individual_on_cal": {
|
| 114 |
+
"model": "arima",
|
| 115 |
+
"test_mae": 2.6063999761370877
|
| 116 |
+
},
|
| 117 |
+
"winner": {
|
| 118 |
+
"method": "best_individual",
|
| 119 |
+
"test_mae": 2.6063999761370877
|
| 120 |
+
}
|
| 121 |
+
},
|
| 122 |
+
"DCOILWTICO_28": {
|
| 123 |
+
"n_cal_folds": 10,
|
| 124 |
+
"n_test_folds": 10,
|
| 125 |
+
"models": [
|
| 126 |
+
"chronos",
|
| 127 |
+
"timesfm",
|
| 128 |
+
"arima",
|
| 129 |
+
"prophet"
|
| 130 |
+
],
|
| 131 |
+
"weights": {
|
| 132 |
+
"equal": {
|
| 133 |
+
"w": [
|
| 134 |
+
0.25,
|
| 135 |
+
0.25,
|
| 136 |
+
0.25,
|
| 137 |
+
0.25
|
| 138 |
+
],
|
| 139 |
+
"test_mae": 7.224652873063855
|
| 140 |
+
},
|
| 141 |
+
"inverse_mae": {
|
| 142 |
+
"w": [
|
| 143 |
+
0.23850653345434814,
|
| 144 |
+
0.3008301142852576,
|
| 145 |
+
0.32149310365193035,
|
| 146 |
+
0.13917024860846383
|
| 147 |
+
],
|
| 148 |
+
"test_mae": 6.73982107186095
|
| 149 |
+
},
|
| 150 |
+
"constrained_mae": {
|
| 151 |
+
"w": [
|
| 152 |
+
1.4923057986615315e-14,
|
| 153 |
+
0.0,
|
| 154 |
+
0.9999999999999623,
|
| 155 |
+
2.2904834182010197e-14
|
| 156 |
+
],
|
| 157 |
+
"test_mae": 5.30872788303258
|
| 158 |
+
},
|
| 159 |
+
"constrained_mse": {
|
| 160 |
+
"w": [
|
| 161 |
+
0.0,
|
| 162 |
+
0.5605029591213022,
|
| 163 |
+
0.4394970408771834,
|
| 164 |
+
1.5144498461763077e-12
|
| 165 |
+
],
|
| 166 |
+
"test_mae": 6.268328694014642
|
| 167 |
+
}
|
| 168 |
+
},
|
| 169 |
+
"best_individual_on_cal": {
|
| 170 |
+
"model": "arima",
|
| 171 |
+
"test_mae": 5.308727883032449
|
| 172 |
+
},
|
| 173 |
+
"winner": {
|
| 174 |
+
"method": "best_individual",
|
| 175 |
+
"test_mae": 5.308727883032449
|
| 176 |
+
}
|
| 177 |
+
},
|
| 178 |
+
"PCOPPUSDM_7": {
|
| 179 |
+
"n_cal_folds": 3,
|
| 180 |
+
"n_test_folds": 3,
|
| 181 |
+
"models": [
|
| 182 |
+
"chronos",
|
| 183 |
+
"timesfm",
|
| 184 |
+
"arima",
|
| 185 |
+
"prophet"
|
| 186 |
+
],
|
| 187 |
+
"weights": {
|
| 188 |
+
"equal": {
|
| 189 |
+
"w": [
|
| 190 |
+
0.25,
|
| 191 |
+
0.25,
|
| 192 |
+
0.25,
|
| 193 |
+
0.25
|
| 194 |
+
],
|
| 195 |
+
"test_mae": 1490.0940767617776
|
| 196 |
+
},
|
| 197 |
+
"inverse_mae": {
|
| 198 |
+
"w": [
|
| 199 |
+
0.27104333378246154,
|
| 200 |
+
0.17597353969029747,
|
| 201 |
+
0.2509767796737437,
|
| 202 |
+
0.30200634685349736
|
| 203 |
+
],
|
| 204 |
+
"test_mae": 1510.2305023002107
|
| 205 |
+
},
|
| 206 |
+
"constrained_mae": {
|
| 207 |
+
"w": [
|
| 208 |
+
0.0,
|
| 209 |
+
0.0,
|
| 210 |
+
0.0,
|
| 211 |
+
1.0
|
| 212 |
+
],
|
| 213 |
+
"test_mae": 2368.6000030761893
|
| 214 |
+
},
|
| 215 |
+
"constrained_mse": {
|
| 216 |
+
"w": [
|
| 217 |
+
0.25,
|
| 218 |
+
0.25,
|
| 219 |
+
0.25,
|
| 220 |
+
0.25
|
| 221 |
+
],
|
| 222 |
+
"test_mae": 1490.0940767617776
|
| 223 |
+
}
|
| 224 |
+
},
|
| 225 |
+
"best_individual_on_cal": {
|
| 226 |
+
"model": "prophet",
|
| 227 |
+
"test_mae": 2368.6000030761893
|
| 228 |
+
},
|
| 229 |
+
"winner": {
|
| 230 |
+
"method": "equal",
|
| 231 |
+
"test_mae": 1490.0940767617776
|
| 232 |
+
}
|
| 233 |
+
},
|
| 234 |
+
"PCOPPUSDM_14": {
|
| 235 |
+
"n_cal_folds": 3,
|
| 236 |
+
"n_test_folds": 3,
|
| 237 |
+
"models": [
|
| 238 |
+
"chronos",
|
| 239 |
+
"timesfm",
|
| 240 |
+
"arima",
|
| 241 |
+
"prophet"
|
| 242 |
+
],
|
| 243 |
+
"weights": {
|
| 244 |
+
"equal": {
|
| 245 |
+
"w": [
|
| 246 |
+
0.25,
|
| 247 |
+
0.25,
|
| 248 |
+
0.25,
|
| 249 |
+
0.25
|
| 250 |
+
],
|
| 251 |
+
"test_mae": 1322.8195925914633
|
| 252 |
+
},
|
| 253 |
+
"inverse_mae": {
|
| 254 |
+
"w": [
|
| 255 |
+
0.39909529037167984,
|
| 256 |
+
0.15858707123054439,
|
| 257 |
+
0.28187978431797855,
|
| 258 |
+
0.1604378540797973
|
| 259 |
+
],
|
| 260 |
+
"test_mae": 1149.0099023538414
|
| 261 |
+
},
|
| 262 |
+
"constrained_mae": {
|
| 263 |
+
"w": [
|
| 264 |
+
1.0,
|
| 265 |
+
0.0,
|
| 266 |
+
0.0,
|
| 267 |
+
0.0
|
| 268 |
+
],
|
| 269 |
+
"test_mae": 835.4762629006885
|
| 270 |
+
},
|
| 271 |
+
"constrained_mse": {
|
| 272 |
+
"w": [
|
| 273 |
+
0.25,
|
| 274 |
+
0.25,
|
| 275 |
+
0.25,
|
| 276 |
+
0.25
|
| 277 |
+
],
|
| 278 |
+
"test_mae": 1322.8195925914633
|
| 279 |
+
}
|
| 280 |
+
},
|
| 281 |
+
"best_individual_on_cal": {
|
| 282 |
+
"model": "chronos",
|
| 283 |
+
"test_mae": 835.4762629006885
|
| 284 |
+
},
|
| 285 |
+
"winner": {
|
| 286 |
+
"method": "constrained_mae",
|
| 287 |
+
"test_mae": 835.4762629006885
|
| 288 |
+
}
|
| 289 |
+
},
|
| 290 |
+
"PCOPPUSDM_28": {
|
| 291 |
+
"n_cal_folds": 3,
|
| 292 |
+
"n_test_folds": 3,
|
| 293 |
+
"models": [
|
| 294 |
+
"chronos",
|
| 295 |
+
"timesfm",
|
| 296 |
+
"arima",
|
| 297 |
+
"prophet"
|
| 298 |
+
],
|
| 299 |
+
"weights": {
|
| 300 |
+
"equal": {
|
| 301 |
+
"w": [
|
| 302 |
+
0.25,
|
| 303 |
+
0.25,
|
| 304 |
+
0.25,
|
| 305 |
+
0.25
|
| 306 |
+
],
|
| 307 |
+
"test_mae": 968.7983373413057
|
| 308 |
+
},
|
| 309 |
+
"inverse_mae": {
|
| 310 |
+
"w": [
|
| 311 |
+
0.24317295792125612,
|
| 312 |
+
0.28640862860805355,
|
| 313 |
+
0.1904195773780233,
|
| 314 |
+
0.2799988360926669
|
| 315 |
+
],
|
| 316 |
+
"test_mae": 988.2430854488761
|
| 317 |
+
},
|
| 318 |
+
"constrained_mae": {
|
| 319 |
+
"w": [
|
| 320 |
+
0.0,
|
| 321 |
+
1.0,
|
| 322 |
+
0.0,
|
| 323 |
+
0.0
|
| 324 |
+
],
|
| 325 |
+
"test_mae": 1383.8323251118418
|
| 326 |
+
},
|
| 327 |
+
"constrained_mse": {
|
| 328 |
+
"w": [
|
| 329 |
+
0.25,
|
| 330 |
+
0.25,
|
| 331 |
+
0.25,
|
| 332 |
+
0.25
|
| 333 |
+
],
|
| 334 |
+
"test_mae": 968.7983373413057
|
| 335 |
+
}
|
| 336 |
+
},
|
| 337 |
+
"best_individual_on_cal": {
|
| 338 |
+
"model": "timesfm",
|
| 339 |
+
"test_mae": 1383.8323251118418
|
| 340 |
+
},
|
| 341 |
+
"winner": {
|
| 342 |
+
"method": "equal",
|
| 343 |
+
"test_mae": 968.7983373413057
|
| 344 |
+
}
|
| 345 |
+
},
|
| 346 |
+
"DEXTAUS_7": {
|
| 347 |
+
"n_cal_folds": 10,
|
| 348 |
+
"n_test_folds": 10,
|
| 349 |
+
"models": [
|
| 350 |
+
"chronos",
|
| 351 |
+
"timesfm",
|
| 352 |
+
"arima",
|
| 353 |
+
"prophet"
|
| 354 |
+
],
|
| 355 |
+
"weights": {
|
| 356 |
+
"equal": {
|
| 357 |
+
"w": [
|
| 358 |
+
0.25,
|
| 359 |
+
0.25,
|
| 360 |
+
0.25,
|
| 361 |
+
0.25
|
| 362 |
+
],
|
| 363 |
+
"test_mae": 0.2169347525199409
|
| 364 |
+
},
|
| 365 |
+
"inverse_mae": {
|
| 366 |
+
"w": [
|
| 367 |
+
0.34398899758591117,
|
| 368 |
+
0.2030939191106745,
|
| 369 |
+
0.3764283233385005,
|
| 370 |
+
0.07648875996491374
|
| 371 |
+
],
|
| 372 |
+
"test_mae": 0.1658846094174201
|
| 373 |
+
},
|
| 374 |
+
"constrained_mae": {
|
| 375 |
+
"w": [
|
| 376 |
+
0.0,
|
| 377 |
+
7.008282842946293e-16,
|
| 378 |
+
0.9999999999999989,
|
| 379 |
+
4.579669976578766e-16
|
| 380 |
+
],
|
| 381 |
+
"test_mae": 0.12304418839562406
|
| 382 |
+
},
|
| 383 |
+
"constrained_mse": {
|
| 384 |
+
"w": [
|
| 385 |
+
0.3806257863168961,
|
| 386 |
+
8.153200337090993e-17,
|
| 387 |
+
0.619374213683104,
|
| 388 |
+
0.0
|
| 389 |
+
],
|
| 390 |
+
"test_mae": 0.12205338531046768
|
| 391 |
+
}
|
| 392 |
+
},
|
| 393 |
+
"best_individual_on_cal": {
|
| 394 |
+
"model": "arima",
|
| 395 |
+
"test_mae": 0.12304418839562384
|
| 396 |
+
},
|
| 397 |
+
"winner": {
|
| 398 |
+
"method": "constrained_mse",
|
| 399 |
+
"test_mae": 0.12205338531046768
|
| 400 |
+
}
|
| 401 |
+
},
|
| 402 |
+
"DEXTAUS_14": {
|
| 403 |
+
"n_cal_folds": 10,
|
| 404 |
+
"n_test_folds": 10,
|
| 405 |
+
"models": [
|
| 406 |
+
"chronos",
|
| 407 |
+
"timesfm",
|
| 408 |
+
"arima",
|
| 409 |
+
"prophet"
|
| 410 |
+
],
|
| 411 |
+
"weights": {
|
| 412 |
+
"equal": {
|
| 413 |
+
"w": [
|
| 414 |
+
0.25,
|
| 415 |
+
0.25,
|
| 416 |
+
0.25,
|
| 417 |
+
0.25
|
| 418 |
+
],
|
| 419 |
+
"test_mae": 0.2936029051307666
|
| 420 |
+
},
|
| 421 |
+
"inverse_mae": {
|
| 422 |
+
"w": [
|
| 423 |
+
0.3024605314294574,
|
| 424 |
+
0.20677440280922138,
|
| 425 |
+
0.3973126914677932,
|
| 426 |
+
0.09345237429352793
|
| 427 |
+
],
|
| 428 |
+
"test_mae": 0.24062725397849288
|
| 429 |
+
},
|
| 430 |
+
"constrained_mae": {
|
| 431 |
+
"w": [
|
| 432 |
+
0.0,
|
| 433 |
+
0.0,
|
| 434 |
+
1.0,
|
| 435 |
+
0.0
|
| 436 |
+
],
|
| 437 |
+
"test_mae": 0.2075701838535929
|
| 438 |
+
},
|
| 439 |
+
"constrained_mse": {
|
| 440 |
+
"w": [
|
| 441 |
+
0.20409965483488535,
|
| 442 |
+
1.196959198423997e-16,
|
| 443 |
+
0.7959003451651147,
|
| 444 |
+
0.0
|
| 445 |
+
],
|
| 446 |
+
"test_mae": 0.20767726865065442
|
| 447 |
+
}
|
| 448 |
+
},
|
| 449 |
+
"best_individual_on_cal": {
|
| 450 |
+
"model": "arima",
|
| 451 |
+
"test_mae": 0.2075701838535929
|
| 452 |
+
},
|
| 453 |
+
"winner": {
|
| 454 |
+
"method": "constrained_mae",
|
| 455 |
+
"test_mae": 0.2075701838535929
|
| 456 |
+
}
|
| 457 |
+
},
|
| 458 |
+
"DEXTAUS_28": {
|
| 459 |
+
"n_cal_folds": 10,
|
| 460 |
+
"n_test_folds": 10,
|
| 461 |
+
"models": [
|
| 462 |
+
"chronos",
|
| 463 |
+
"timesfm",
|
| 464 |
+
"arima",
|
| 465 |
+
"prophet"
|
| 466 |
+
],
|
| 467 |
+
"weights": {
|
| 468 |
+
"equal": {
|
| 469 |
+
"w": [
|
| 470 |
+
0.25,
|
| 471 |
+
0.25,
|
| 472 |
+
0.25,
|
| 473 |
+
0.25
|
| 474 |
+
],
|
| 475 |
+
"test_mae": 0.35161458970616255
|
| 476 |
+
},
|
| 477 |
+
"inverse_mae": {
|
| 478 |
+
"w": [
|
| 479 |
+
0.31779598685220195,
|
| 480 |
+
0.27176079256586594,
|
| 481 |
+
0.28189025800444834,
|
| 482 |
+
0.12855296257748378
|
| 483 |
+
],
|
| 484 |
+
"test_mae": 0.3189607034469092
|
| 485 |
+
},
|
| 486 |
+
"constrained_mae": {
|
| 487 |
+
"w": [
|
| 488 |
+
0.9999999999999998,
|
| 489 |
+
0.0,
|
| 490 |
+
0.0,
|
| 491 |
+
3.1918911957973246e-16
|
| 492 |
+
],
|
| 493 |
+
"test_mae": 0.289064216740161
|
| 494 |
+
},
|
| 495 |
+
"constrained_mse": {
|
| 496 |
+
"w": [
|
| 497 |
+
0.45663759735298354,
|
| 498 |
+
0.10339949724699603,
|
| 499 |
+
0.4399629054000205,
|
| 500 |
+
0.0
|
| 501 |
+
],
|
| 502 |
+
"test_mae": 0.27882969196380114
|
| 503 |
+
}
|
| 504 |
+
},
|
| 505 |
+
"best_individual_on_cal": {
|
| 506 |
+
"model": "chronos",
|
| 507 |
+
"test_mae": 0.2890642167401609
|
| 508 |
+
},
|
| 509 |
+
"winner": {
|
| 510 |
+
"method": "constrained_mse",
|
| 511 |
+
"test_mae": 0.27882969196380114
|
| 512 |
+
}
|
| 513 |
+
},
|
| 514 |
+
"DEXKOUS_7": {
|
| 515 |
+
"n_cal_folds": 10,
|
| 516 |
+
"n_test_folds": 10,
|
| 517 |
+
"models": [
|
| 518 |
+
"chronos",
|
| 519 |
+
"timesfm",
|
| 520 |
+
"arima",
|
| 521 |
+
"prophet"
|
| 522 |
+
],
|
| 523 |
+
"weights": {
|
| 524 |
+
"equal": {
|
| 525 |
+
"w": [
|
| 526 |
+
0.25,
|
| 527 |
+
0.25,
|
| 528 |
+
0.25,
|
| 529 |
+
0.25
|
| 530 |
+
],
|
| 531 |
+
"test_mae": 17.2493699999521
|
| 532 |
+
},
|
| 533 |
+
"inverse_mae": {
|
| 534 |
+
"w": [
|
| 535 |
+
0.22521754050965248,
|
| 536 |
+
0.2661802247036112,
|
| 537 |
+
0.3761094665932334,
|
| 538 |
+
0.1324927681935029
|
| 539 |
+
],
|
| 540 |
+
"test_mae": 15.47479328474102
|
| 541 |
+
},
|
| 542 |
+
"constrained_mae": {
|
| 543 |
+
"w": [
|
| 544 |
+
0.0,
|
| 545 |
+
2.7089441800853084e-14,
|
| 546 |
+
0.9999999999999729,
|
| 547 |
+
0.0
|
| 548 |
+
],
|
| 549 |
+
"test_mae": 14.0900150189361
|
| 550 |
+
},
|
| 551 |
+
"constrained_mse": {
|
| 552 |
+
"w": [
|
| 553 |
+
1.4068121662922204e-19,
|
| 554 |
+
0.19202529383105713,
|
| 555 |
+
0.8079747057066696,
|
| 556 |
+
4.6227315218243986e-10
|
| 557 |
+
],
|
| 558 |
+
"test_mae": 14.093086604275276
|
| 559 |
+
}
|
| 560 |
+
},
|
| 561 |
+
"best_individual_on_cal": {
|
| 562 |
+
"model": "arima",
|
| 563 |
+
"test_mae": 14.0900150189361
|
| 564 |
+
},
|
| 565 |
+
"winner": {
|
| 566 |
+
"method": "constrained_mae",
|
| 567 |
+
"test_mae": 14.0900150189361
|
| 568 |
+
}
|
| 569 |
+
},
|
| 570 |
+
"DEXKOUS_14": {
|
| 571 |
+
"n_cal_folds": 10,
|
| 572 |
+
"n_test_folds": 10,
|
| 573 |
+
"models": [
|
| 574 |
+
"chronos",
|
| 575 |
+
"timesfm",
|
| 576 |
+
"arima",
|
| 577 |
+
"prophet"
|
| 578 |
+
],
|
| 579 |
+
"weights": {
|
| 580 |
+
"equal": {
|
| 581 |
+
"w": [
|
| 582 |
+
0.25,
|
| 583 |
+
0.25,
|
| 584 |
+
0.25,
|
| 585 |
+
0.25
|
| 586 |
+
],
|
| 587 |
+
"test_mae": 19.357951817590667
|
| 588 |
+
},
|
| 589 |
+
"inverse_mae": {
|
| 590 |
+
"w": [
|
| 591 |
+
0.3500118447028979,
|
| 592 |
+
0.25958141131048756,
|
| 593 |
+
0.2744350765852677,
|
| 594 |
+
0.11597166740134691
|
| 595 |
+
],
|
| 596 |
+
"test_mae": 17.40246559654232
|
| 597 |
+
},
|
| 598 |
+
"constrained_mae": {
|
| 599 |
+
"w": [
|
| 600 |
+
0.9999999999992815,
|
| 601 |
+
3.2990277176712823e-13,
|
| 602 |
+
3.88689080920988e-13,
|
| 603 |
+
0.0
|
| 604 |
+
],
|
| 605 |
+
"test_mae": 13.478487470042296
|
| 606 |
+
},
|
| 607 |
+
"constrained_mse": {
|
| 608 |
+
"w": [
|
| 609 |
+
0.999999999787164,
|
| 610 |
+
0.0,
|
| 611 |
+
0.0,
|
| 612 |
+
2.1283591823683064e-10
|
| 613 |
+
],
|
| 614 |
+
"test_mae": 13.478487473311748
|
| 615 |
+
}
|
| 616 |
+
},
|
| 617 |
+
"best_individual_on_cal": {
|
| 618 |
+
"model": "chronos",
|
| 619 |
+
"test_mae": 13.4784874700395
|
| 620 |
+
},
|
| 621 |
+
"winner": {
|
| 622 |
+
"method": "best_individual",
|
| 623 |
+
"test_mae": 13.4784874700395
|
| 624 |
+
}
|
| 625 |
+
},
|
| 626 |
+
"DEXKOUS_28": {
|
| 627 |
+
"n_cal_folds": 10,
|
| 628 |
+
"n_test_folds": 10,
|
| 629 |
+
"models": [
|
| 630 |
+
"chronos",
|
| 631 |
+
"timesfm",
|
| 632 |
+
"arima",
|
| 633 |
+
"prophet"
|
| 634 |
+
],
|
| 635 |
+
"weights": {
|
| 636 |
+
"equal": {
|
| 637 |
+
"w": [
|
| 638 |
+
0.25,
|
| 639 |
+
0.25,
|
| 640 |
+
0.25,
|
| 641 |
+
0.25
|
| 642 |
+
],
|
| 643 |
+
"test_mae": 24.8683981319863
|
| 644 |
+
},
|
| 645 |
+
"inverse_mae": {
|
| 646 |
+
"w": [
|
| 647 |
+
0.15714338435667446,
|
| 648 |
+
0.3032008336686258,
|
| 649 |
+
0.3174445784155295,
|
| 650 |
+
0.22221120355917026
|
| 651 |
+
],
|
| 652 |
+
"test_mae": 23.767772135429315
|
| 653 |
+
},
|
| 654 |
+
"constrained_mae": {
|
| 655 |
+
"w": [
|
| 656 |
+
0.0,
|
| 657 |
+
0.0,
|
| 658 |
+
1.0,
|
| 659 |
+
0.0
|
| 660 |
+
],
|
| 661 |
+
"test_mae": 13.038534452266783
|
| 662 |
+
},
|
| 663 |
+
"constrained_mse": {
|
| 664 |
+
"w": [
|
| 665 |
+
0.0,
|
| 666 |
+
1.6482941097956984e-10,
|
| 667 |
+
0.9999999997453165,
|
| 668 |
+
8.9854093955618e-11
|
| 669 |
+
],
|
| 670 |
+
"test_mae": 13.038534456323145
|
| 671 |
+
}
|
| 672 |
+
},
|
| 673 |
+
"best_individual_on_cal": {
|
| 674 |
+
"model": "arima",
|
| 675 |
+
"test_mae": 13.038534452266783
|
| 676 |
+
},
|
| 677 |
+
"winner": {
|
| 678 |
+
"method": "constrained_mae",
|
| 679 |
+
"test_mae": 13.038534452266783
|
| 680 |
+
}
|
| 681 |
+
},
|
| 682 |
+
"DEXJPUS_7": {
|
| 683 |
+
"n_cal_folds": 10,
|
| 684 |
+
"n_test_folds": 10,
|
| 685 |
+
"models": [
|
| 686 |
+
"chronos",
|
| 687 |
+
"timesfm",
|
| 688 |
+
"arima",
|
| 689 |
+
"prophet"
|
| 690 |
+
],
|
| 691 |
+
"weights": {
|
| 692 |
+
"equal": {
|
| 693 |
+
"w": [
|
| 694 |
+
0.25,
|
| 695 |
+
0.25,
|
| 696 |
+
0.25,
|
| 697 |
+
0.25
|
| 698 |
+
],
|
| 699 |
+
"test_mae": 2.0058613373406016
|
| 700 |
+
},
|
| 701 |
+
"inverse_mae": {
|
| 702 |
+
"w": [
|
| 703 |
+
0.3311569291093271,
|
| 704 |
+
0.21966516526756977,
|
| 705 |
+
0.27781607384114676,
|
| 706 |
+
0.17136183178195635
|
| 707 |
+
],
|
| 708 |
+
"test_mae": 1.7598609660764388
|
| 709 |
+
},
|
| 710 |
+
"constrained_mae": {
|
| 711 |
+
"w": [
|
| 712 |
+
0.9999999999999993,
|
| 713 |
+
0.0,
|
| 714 |
+
0.0,
|
| 715 |
+
7.14706072102444e-16
|
| 716 |
+
],
|
| 717 |
+
"test_mae": 0.9624409634715991
|
| 718 |
+
},
|
| 719 |
+
"constrained_mse": {
|
| 720 |
+
"w": [
|
| 721 |
+
0.637656517780962,
|
| 722 |
+
0.0,
|
| 723 |
+
0.36234348221903795,
|
| 724 |
+
2.0816681711721676e-17
|
| 725 |
+
],
|
| 726 |
+
"test_mae": 1.1158006833860175
|
| 727 |
+
}
|
| 728 |
+
},
|
| 729 |
+
"best_individual_on_cal": {
|
| 730 |
+
"model": "chronos",
|
| 731 |
+
"test_mae": 0.962440963471597
|
| 732 |
+
},
|
| 733 |
+
"winner": {
|
| 734 |
+
"method": "best_individual",
|
| 735 |
+
"test_mae": 0.962440963471597
|
| 736 |
+
}
|
| 737 |
+
},
|
| 738 |
+
"DEXJPUS_14": {
|
| 739 |
+
"n_cal_folds": 10,
|
| 740 |
+
"n_test_folds": 10,
|
| 741 |
+
"models": [
|
| 742 |
+
"chronos",
|
| 743 |
+
"timesfm",
|
| 744 |
+
"arima",
|
| 745 |
+
"prophet"
|
| 746 |
+
],
|
| 747 |
+
"weights": {
|
| 748 |
+
"equal": {
|
| 749 |
+
"w": [
|
| 750 |
+
0.25,
|
| 751 |
+
0.25,
|
| 752 |
+
0.25,
|
| 753 |
+
0.25
|
| 754 |
+
],
|
| 755 |
+
"test_mae": 2.0585639763398134
|
| 756 |
+
},
|
| 757 |
+
"inverse_mae": {
|
| 758 |
+
"w": [
|
| 759 |
+
0.29221948346213755,
|
| 760 |
+
0.30006908767689383,
|
| 761 |
+
0.3336814964148649,
|
| 762 |
+
0.07402993244610366
|
| 763 |
+
],
|
| 764 |
+
"test_mae": 1.525371337574877
|
| 765 |
+
},
|
| 766 |
+
"constrained_mae": {
|
| 767 |
+
"w": [
|
| 768 |
+
0.0,
|
| 769 |
+
0.0,
|
| 770 |
+
0.9999999999998224,
|
| 771 |
+
1.7753796613177788e-13
|
| 772 |
+
],
|
| 773 |
+
"test_mae": 0.9391751508495592
|
| 774 |
+
},
|
| 775 |
+
"constrained_mse": {
|
| 776 |
+
"w": [
|
| 777 |
+
0.0,
|
| 778 |
+
0.23909961575984545,
|
| 779 |
+
0.7609003842401545,
|
| 780 |
+
0.0
|
| 781 |
+
],
|
| 782 |
+
"test_mae": 1.1619170740566178
|
| 783 |
+
}
|
| 784 |
+
},
|
| 785 |
+
"best_individual_on_cal": {
|
| 786 |
+
"model": "arima",
|
| 787 |
+
"test_mae": 0.9391751508489655
|
| 788 |
+
},
|
| 789 |
+
"winner": {
|
| 790 |
+
"method": "best_individual",
|
| 791 |
+
"test_mae": 0.9391751508489655
|
| 792 |
+
}
|
| 793 |
+
},
|
| 794 |
+
"DEXJPUS_28": {
|
| 795 |
+
"n_cal_folds": 10,
|
| 796 |
+
"n_test_folds": 10,
|
| 797 |
+
"models": [
|
| 798 |
+
"chronos",
|
| 799 |
+
"timesfm",
|
| 800 |
+
"arima",
|
| 801 |
+
"prophet"
|
| 802 |
+
],
|
| 803 |
+
"weights": {
|
| 804 |
+
"equal": {
|
| 805 |
+
"w": [
|
| 806 |
+
0.25,
|
| 807 |
+
0.25,
|
| 808 |
+
0.25,
|
| 809 |
+
0.25
|
| 810 |
+
],
|
| 811 |
+
"test_mae": 2.6223114452299363
|
| 812 |
+
},
|
| 813 |
+
"inverse_mae": {
|
| 814 |
+
"w": [
|
| 815 |
+
0.2431707261347647,
|
| 816 |
+
0.2670867329969705,
|
| 817 |
+
0.36747924632317114,
|
| 818 |
+
0.12226329454509363
|
| 819 |
+
],
|
| 820 |
+
"test_mae": 2.501007095618067
|
| 821 |
+
},
|
| 822 |
+
"constrained_mae": {
|
| 823 |
+
"w": [
|
| 824 |
+
0.0,
|
| 825 |
+
0.0,
|
| 826 |
+
1.0,
|
| 827 |
+
0.0
|
| 828 |
+
],
|
| 829 |
+
"test_mae": 2.3202441940310328
|
| 830 |
+
},
|
| 831 |
+
"constrained_mse": {
|
| 832 |
+
"w": [
|
| 833 |
+
0.12111050197987697,
|
| 834 |
+
1.124100812432969e-15,
|
| 835 |
+
0.8788894980201218,
|
| 836 |
+
0.0
|
| 837 |
+
],
|
| 838 |
+
"test_mae": 2.284742353079749
|
| 839 |
+
}
|
| 840 |
+
},
|
| 841 |
+
"best_individual_on_cal": {
|
| 842 |
+
"model": "arima",
|
| 843 |
+
"test_mae": 2.3202441940310328
|
| 844 |
+
},
|
| 845 |
+
"winner": {
|
| 846 |
+
"method": "constrained_mse",
|
| 847 |
+
"test_mae": 2.284742353079749
|
| 848 |
+
}
|
| 849 |
+
},
|
| 850 |
+
"DEXUSEU_7": {
|
| 851 |
+
"n_cal_folds": 10,
|
| 852 |
+
"n_test_folds": 10,
|
| 853 |
+
"models": [
|
| 854 |
+
"chronos",
|
| 855 |
+
"timesfm",
|
| 856 |
+
"arima",
|
| 857 |
+
"prophet"
|
| 858 |
+
],
|
| 859 |
+
"weights": {
|
| 860 |
+
"equal": {
|
| 861 |
+
"w": [
|
| 862 |
+
0.25,
|
| 863 |
+
0.25,
|
| 864 |
+
0.25,
|
| 865 |
+
0.25
|
| 866 |
+
],
|
| 867 |
+
"test_mae": 0.01777263656328388
|
| 868 |
+
},
|
| 869 |
+
"inverse_mae": {
|
| 870 |
+
"w": [
|
| 871 |
+
0.4380311521257709,
|
| 872 |
+
0.1895078632684431,
|
| 873 |
+
0.2934679866590765,
|
| 874 |
+
0.07899299794670979
|
| 875 |
+
],
|
| 876 |
+
"test_mae": 0.012544562664192396
|
| 877 |
+
},
|
| 878 |
+
"constrained_mae": {
|
| 879 |
+
"w": [
|
| 880 |
+
0.9999999999999984,
|
| 881 |
+
1.0061396160665477e-15,
|
| 882 |
+
5.846018114041837e-16,
|
| 883 |
+
0.0
|
| 884 |
+
],
|
| 885 |
+
"test_mae": 0.008009630047676911
|
| 886 |
+
},
|
| 887 |
+
"constrained_mse": {
|
| 888 |
+
"w": [
|
| 889 |
+
0.88076958835974,
|
| 890 |
+
5.551115123125784e-17,
|
| 891 |
+
0.11923041164026013,
|
| 892 |
+
5.551115123125784e-17
|
| 893 |
+
],
|
| 894 |
+
"test_mae": 0.00812923667806015
|
| 895 |
+
}
|
| 896 |
+
},
|
| 897 |
+
"best_individual_on_cal": {
|
| 898 |
+
"model": "chronos",
|
| 899 |
+
"test_mae": 0.008009630047676897
|
| 900 |
+
},
|
| 901 |
+
"winner": {
|
| 902 |
+
"method": "best_individual",
|
| 903 |
+
"test_mae": 0.008009630047676897
|
| 904 |
+
}
|
| 905 |
+
},
|
| 906 |
+
"DEXUSEU_14": {
|
| 907 |
+
"n_cal_folds": 10,
|
| 908 |
+
"n_test_folds": 10,
|
| 909 |
+
"models": [
|
| 910 |
+
"chronos",
|
| 911 |
+
"timesfm",
|
| 912 |
+
"arima",
|
| 913 |
+
"prophet"
|
| 914 |
+
],
|
| 915 |
+
"weights": {
|
| 916 |
+
"equal": {
|
| 917 |
+
"w": [
|
| 918 |
+
0.25,
|
| 919 |
+
0.25,
|
| 920 |
+
0.25,
|
| 921 |
+
0.25
|
| 922 |
+
],
|
| 923 |
+
"test_mae": 0.01766253143684469
|
| 924 |
+
},
|
| 925 |
+
"inverse_mae": {
|
| 926 |
+
"w": [
|
| 927 |
+
0.3649772970412571,
|
| 928 |
+
0.20972059927142733,
|
| 929 |
+
0.2903737730393877,
|
| 930 |
+
0.13492833064792778
|
| 931 |
+
],
|
| 932 |
+
"test_mae": 0.015437376589926739
|
| 933 |
+
},
|
| 934 |
+
"constrained_mae": {
|
| 935 |
+
"w": [
|
| 936 |
+
0.9999999999999998,
|
| 937 |
+
0.0,
|
| 938 |
+
0.0,
|
| 939 |
+
2.1510571102112403e-16
|
| 940 |
+
],
|
| 941 |
+
"test_mae": 0.01478179445033124
|
| 942 |
+
},
|
| 943 |
+
"constrained_mse": {
|
| 944 |
+
"w": [
|
| 945 |
+
0.5541512994206012,
|
| 946 |
+
1.3877787807814457e-16,
|
| 947 |
+
0.4458487005793988,
|
| 948 |
+
1.0408340855860843e-17
|
| 949 |
+
],
|
| 950 |
+
"test_mae": 0.012606685154728608
|
| 951 |
+
}
|
| 952 |
+
},
|
| 953 |
+
"best_individual_on_cal": {
|
| 954 |
+
"model": "chronos",
|
| 955 |
+
"test_mae": 0.014781794450331237
|
| 956 |
+
},
|
| 957 |
+
"winner": {
|
| 958 |
+
"method": "constrained_mse",
|
| 959 |
+
"test_mae": 0.012606685154728608
|
| 960 |
+
}
|
| 961 |
+
},
|
| 962 |
+
"DEXUSEU_28": {
|
| 963 |
+
"n_cal_folds": 10,
|
| 964 |
+
"n_test_folds": 10,
|
| 965 |
+
"models": [
|
| 966 |
+
"chronos",
|
| 967 |
+
"timesfm",
|
| 968 |
+
"arima",
|
| 969 |
+
"prophet"
|
| 970 |
+
],
|
| 971 |
+
"weights": {
|
| 972 |
+
"equal": {
|
| 973 |
+
"w": [
|
| 974 |
+
0.25,
|
| 975 |
+
0.25,
|
| 976 |
+
0.25,
|
| 977 |
+
0.25
|
| 978 |
+
],
|
| 979 |
+
"test_mae": 0.017842508329409604
|
| 980 |
+
},
|
| 981 |
+
"inverse_mae": {
|
| 982 |
+
"w": [
|
| 983 |
+
0.3562207101529807,
|
| 984 |
+
0.18924080034829216,
|
| 985 |
+
0.31700784157235296,
|
| 986 |
+
0.13753064792637432
|
| 987 |
+
],
|
| 988 |
+
"test_mae": 0.015970560076149547
|
| 989 |
+
},
|
| 990 |
+
"constrained_mae": {
|
| 991 |
+
"w": [
|
| 992 |
+
0.9999999999999982,
|
| 993 |
+
9.43689570931382e-16,
|
| 994 |
+
0.0,
|
| 995 |
+
8.049116928532376e-16
|
| 996 |
+
],
|
| 997 |
+
"test_mae": 0.014453346940792903
|
| 998 |
+
},
|
| 999 |
+
"constrained_mse": {
|
| 1000 |
+
"w": [
|
| 1001 |
+
0.5446169594084305,
|
| 1002 |
+
2.7755575615628907e-17,
|
| 1003 |
+
0.45538304059156953,
|
| 1004 |
+
0.0
|
| 1005 |
+
],
|
| 1006 |
+
"test_mae": 0.013183660449898013
|
| 1007 |
+
}
|
| 1008 |
+
},
|
| 1009 |
+
"best_individual_on_cal": {
|
| 1010 |
+
"model": "chronos",
|
| 1011 |
+
"test_mae": 0.014453346940792889
|
| 1012 |
+
},
|
| 1013 |
+
"winner": {
|
| 1014 |
+
"method": "constrained_mse",
|
| 1015 |
+
"test_mae": 0.013183660449898013
|
| 1016 |
+
}
|
| 1017 |
+
},
|
| 1018 |
+
"DEXCHUS_7": {
|
| 1019 |
+
"n_cal_folds": 10,
|
| 1020 |
+
"n_test_folds": 10,
|
| 1021 |
+
"models": [
|
| 1022 |
+
"chronos",
|
| 1023 |
+
"timesfm",
|
| 1024 |
+
"arima",
|
| 1025 |
+
"prophet"
|
| 1026 |
+
],
|
| 1027 |
+
"weights": {
|
| 1028 |
+
"equal": {
|
| 1029 |
+
"w": [
|
| 1030 |
+
0.25,
|
| 1031 |
+
0.25,
|
| 1032 |
+
0.25,
|
| 1033 |
+
0.25
|
| 1034 |
+
],
|
| 1035 |
+
"test_mae": 0.034690690500036904
|
| 1036 |
+
},
|
| 1037 |
+
"inverse_mae": {
|
| 1038 |
+
"w": [
|
| 1039 |
+
0.30725895677630083,
|
| 1040 |
+
0.24691376598214834,
|
| 1041 |
+
0.3943485789337087,
|
| 1042 |
+
0.05147869830784206
|
| 1043 |
+
],
|
| 1044 |
+
"test_mae": 0.02117886221826054
|
| 1045 |
+
},
|
| 1046 |
+
"constrained_mae": {
|
| 1047 |
+
"w": [
|
| 1048 |
+
0.0,
|
| 1049 |
+
0.0,
|
| 1050 |
+
0.9999999999999998,
|
| 1051 |
+
2.3409280156677643e-16
|
| 1052 |
+
],
|
| 1053 |
+
"test_mae": 0.015762412884263256
|
| 1054 |
+
},
|
| 1055 |
+
"constrained_mse": {
|
| 1056 |
+
"w": [
|
| 1057 |
+
0.0,
|
| 1058 |
+
0.040015823687684034,
|
| 1059 |
+
0.959984176312316,
|
| 1060 |
+
1.0408340855860841e-17
|
| 1061 |
+
],
|
| 1062 |
+
"test_mae": 0.016130545137926368
|
| 1063 |
+
}
|
| 1064 |
+
},
|
| 1065 |
+
"best_individual_on_cal": {
|
| 1066 |
+
"model": "arima",
|
| 1067 |
+
"test_mae": 0.015762412884263242
|
| 1068 |
+
},
|
| 1069 |
+
"winner": {
|
| 1070 |
+
"method": "best_individual",
|
| 1071 |
+
"test_mae": 0.015762412884263242
|
| 1072 |
+
}
|
| 1073 |
+
},
|
| 1074 |
+
"DEXCHUS_14": {
|
| 1075 |
+
"n_cal_folds": 10,
|
| 1076 |
+
"n_test_folds": 10,
|
| 1077 |
+
"models": [
|
| 1078 |
+
"chronos",
|
| 1079 |
+
"timesfm",
|
| 1080 |
+
"arima",
|
| 1081 |
+
"prophet"
|
| 1082 |
+
],
|
| 1083 |
+
"weights": {
|
| 1084 |
+
"equal": {
|
| 1085 |
+
"w": [
|
| 1086 |
+
0.25,
|
| 1087 |
+
0.25,
|
| 1088 |
+
0.25,
|
| 1089 |
+
0.25
|
| 1090 |
+
],
|
| 1091 |
+
"test_mae": 0.049119233033837334
|
| 1092 |
+
},
|
| 1093 |
+
"inverse_mae": {
|
| 1094 |
+
"w": [
|
| 1095 |
+
0.2988178654996703,
|
| 1096 |
+
0.30220512040404324,
|
| 1097 |
+
0.27971237870613896,
|
| 1098 |
+
0.1192646353901474
|
| 1099 |
+
],
|
| 1100 |
+
"test_mae": 0.04197509948228402
|
| 1101 |
+
},
|
| 1102 |
+
"constrained_mae": {
|
| 1103 |
+
"w": [
|
| 1104 |
+
0.0,
|
| 1105 |
+
0.9999999999999992,
|
| 1106 |
+
3.6082248300317563e-16,
|
| 1107 |
+
3.4174052476743075e-16
|
| 1108 |
+
],
|
| 1109 |
+
"test_mae": 0.03187400458960995
|
| 1110 |
+
},
|
| 1111 |
+
"constrained_mse": {
|
| 1112 |
+
"w": [
|
| 1113 |
+
0.5594517657002177,
|
| 1114 |
+
0.23577483341396505,
|
| 1115 |
+
0.2047734008858172,
|
| 1116 |
+
0.0
|
| 1117 |
+
],
|
| 1118 |
+
"test_mae": 0.033564545616950006
|
| 1119 |
+
}
|
| 1120 |
+
},
|
| 1121 |
+
"best_individual_on_cal": {
|
| 1122 |
+
"model": "timesfm",
|
| 1123 |
+
"test_mae": 0.03187400458960993
|
| 1124 |
+
},
|
| 1125 |
+
"winner": {
|
| 1126 |
+
"method": "best_individual",
|
| 1127 |
+
"test_mae": 0.03187400458960993
|
| 1128 |
+
}
|
| 1129 |
+
},
|
| 1130 |
+
"DEXCHUS_28": {
|
| 1131 |
+
"n_cal_folds": 10,
|
| 1132 |
+
"n_test_folds": 10,
|
| 1133 |
+
"models": [
|
| 1134 |
+
"chronos",
|
| 1135 |
+
"timesfm",
|
| 1136 |
+
"arima",
|
| 1137 |
+
"prophet"
|
| 1138 |
+
],
|
| 1139 |
+
"weights": {
|
| 1140 |
+
"equal": {
|
| 1141 |
+
"w": [
|
| 1142 |
+
0.25,
|
| 1143 |
+
0.25,
|
| 1144 |
+
0.25,
|
| 1145 |
+
0.25
|
| 1146 |
+
],
|
| 1147 |
+
"test_mae": 0.07622515708177849
|
| 1148 |
+
},
|
| 1149 |
+
"inverse_mae": {
|
| 1150 |
+
"w": [
|
| 1151 |
+
0.21374276213191848,
|
| 1152 |
+
0.32878921058258087,
|
| 1153 |
+
0.27206545754178274,
|
| 1154 |
+
0.18540256974371785
|
| 1155 |
+
],
|
| 1156 |
+
"test_mae": 0.07368140063745915
|
| 1157 |
+
},
|
| 1158 |
+
"constrained_mae": {
|
| 1159 |
+
"w": [
|
| 1160 |
+
3.565258741241218e-17,
|
| 1161 |
+
0.9999999999999993,
|
| 1162 |
+
0.0,
|
| 1163 |
+
6.714758455242072e-16
|
| 1164 |
+
],
|
| 1165 |
+
"test_mae": 0.05984540049808135
|
| 1166 |
+
},
|
| 1167 |
+
"constrained_mse": {
|
| 1168 |
+
"w": [
|
| 1169 |
+
0.0,
|
| 1170 |
+
0.7615511144034006,
|
| 1171 |
+
0.23844888559659938,
|
| 1172 |
+
5.308685925196128e-17
|
| 1173 |
+
],
|
| 1174 |
+
"test_mae": 0.06440512615984152
|
| 1175 |
+
}
|
| 1176 |
+
},
|
| 1177 |
+
"best_individual_on_cal": {
|
| 1178 |
+
"model": "timesfm",
|
| 1179 |
+
"test_mae": 0.059845400498081305
|
| 1180 |
+
},
|
| 1181 |
+
"winner": {
|
| 1182 |
+
"method": "best_individual",
|
| 1183 |
+
"test_mae": 0.059845400498081305
|
| 1184 |
+
}
|
| 1185 |
+
}
|
| 1186 |
+
},
|
| 1187 |
+
"elapsed_s": 0.09606218338012695
|
| 1188 |
}
|
FINAL_SUBMIT/receipts/R3_STACKING_V3_POINTLEVEL.json
CHANGED
|
@@ -1,227 +1,227 @@
|
|
| 1 |
-
{
|
| 2 |
-
"description": "Per-point Bates-Granger constrained stacking on real forecaster outputs. No synthesized folds.",
|
| 3 |
-
"per_target_horizon": {
|
| 4 |
-
"DCOILWTICO_h7": {
|
| 5 |
-
"n_cal_points": 70,
|
| 6 |
-
"n_test_points": 70,
|
| 7 |
-
"individual_mae": {
|
| 8 |
-
"chronos": 3.006047764369419,
|
| 9 |
-
"arima": 3.0841361525087674,
|
| 10 |
-
"prophet": 8.557134422551027,
|
| 11 |
-
"naive": 2.839285714285714
|
| 12 |
-
},
|
| 13 |
-
"stacking_mae": {
|
| 14 |
-
"equal": 3.381860717562512,
|
| 15 |
-
"best_on_cal": 2.839285714285714,
|
| 16 |
-
"constrained_mae": 2.839285714285714,
|
| 17 |
-
"constrained_mse": 2.839285714285714
|
| 18 |
-
},
|
| 19 |
-
"weights": {
|
| 20 |
-
"constrained_mae": {
|
| 21 |
-
"chronos": 0.0,
|
| 22 |
-
"arima": 3.8857805861880464e-16,
|
| 23 |
-
"prophet": 0.0,
|
| 24 |
-
"naive": 0.9999999999999996
|
| 25 |
-
},
|
| 26 |
-
"constrained_mse": {
|
| 27 |
-
"chronos": 1.2281842209915794e-15,
|
| 28 |
-
"arima": 1.7069679003611782e-15,
|
| 29 |
-
"prophet": 6.824272182231614e-17,
|
| 30 |
-
"naive": 0.999999999999997
|
| 31 |
-
}
|
| 32 |
-
},
|
| 33 |
-
"best_single_model": "naive",
|
| 34 |
-
"best_single_mae": 2.839285714285714,
|
| 35 |
-
"winner_method": "naive",
|
| 36 |
-
"winner_mae": 2.839285714285714,
|
| 37 |
-
"constrained_beats_best_single": false
|
| 38 |
-
},
|
| 39 |
-
"DCOILWTICO_h14": {
|
| 40 |
-
"n_cal_points": 140,
|
| 41 |
-
"n_test_points": 140,
|
| 42 |
-
"individual_mae": {
|
| 43 |
-
"chronos": 3.797937408447266,
|
| 44 |
-
"arima": 3.917782537843266,
|
| 45 |
-
"prophet": 9.218187229009528,
|
| 46 |
-
"naive": 3.6239285714285714
|
| 47 |
-
},
|
| 48 |
-
"stacking_mae": {
|
| 49 |
-
"equal": 3.9604401984158755,
|
| 50 |
-
"best_on_cal": 3.6239285714285714,
|
| 51 |
-
"constrained_mae": 3.623928571428571,
|
| 52 |
-
"constrained_mse": 3.6994484688718305
|
| 53 |
-
},
|
| 54 |
-
"weights": {
|
| 55 |
-
"constrained_mae": {
|
| 56 |
-
"chronos": 1.3877787807814454e-16,
|
| 57 |
-
"arima": 0.0,
|
| 58 |
-
"prophet": 0.0,
|
| 59 |
-
"naive": 0.9999999999999998
|
| 60 |
-
},
|
| 61 |
-
"constrained_mse": {
|
| 62 |
-
"chronos": 3.0753177782116836e-14,
|
| 63 |
-
"arima": 0.25973397692659406,
|
| 64 |
-
"prophet": 1.0636618946679322e-15,
|
| 65 |
-
"naive": 0.7402660230733741
|
| 66 |
-
}
|
| 67 |
-
},
|
| 68 |
-
"best_single_model": "naive",
|
| 69 |
-
"best_single_mae": 3.6239285714285714,
|
| 70 |
-
"winner_method": "constrained_mae",
|
| 71 |
-
"winner_mae": 3.623928571428571,
|
| 72 |
-
"constrained_beats_best_single": true
|
| 73 |
-
},
|
| 74 |
-
"DEXUSEU_h7": {
|
| 75 |
-
"n_cal_points": 70,
|
| 76 |
-
"n_test_points": 70,
|
| 77 |
-
"individual_mae": {
|
| 78 |
-
"chronos": 0.00997808286394391,
|
| 79 |
-
"arima": 0.00909829887487626,
|
| 80 |
-
"prophet": 0.04588529230089117,
|
| 81 |
-
"naive": 0.009057142857142856
|
| 82 |
-
},
|
| 83 |
-
"stacking_mae": {
|
| 84 |
-
"equal": 0.013885443002327432,
|
| 85 |
-
"best_on_cal": 0.00997808286394391,
|
| 86 |
-
"constrained_mae": 0.009495985176023706,
|
| 87 |
-
"constrained_mse": 0.013885443002327432
|
| 88 |
-
},
|
| 89 |
-
"weights": {
|
| 90 |
-
"constrained_mae": {
|
| 91 |
-
"chronos": 0.3382904222928093,
|
| 92 |
-
"arima": 0.2908333034179931,
|
| 93 |
-
"prophet": 0.07824807605162067,
|
| 94 |
-
"naive": 0.292628198237577
|
| 95 |
-
},
|
| 96 |
-
"constrained_mse": {
|
| 97 |
-
"chronos": 0.25,
|
| 98 |
-
"arima": 0.25,
|
| 99 |
-
"prophet": 0.25,
|
| 100 |
-
"naive": 0.25
|
| 101 |
-
}
|
| 102 |
-
},
|
| 103 |
-
"best_single_model": "naive",
|
| 104 |
-
"best_single_mae": 0.009057142857142856,
|
| 105 |
-
"winner_method": "naive",
|
| 106 |
-
"winner_mae": 0.009057142857142856,
|
| 107 |
-
"constrained_beats_best_single": false
|
| 108 |
-
},
|
| 109 |
-
"DEXUSEU_h14": {
|
| 110 |
-
"n_cal_points": 140,
|
| 111 |
-
"n_test_points": 140,
|
| 112 |
-
"individual_mae": {
|
| 113 |
-
"chronos": 0.013727861084256852,
|
| 114 |
-
"arima": 0.012013652348349491,
|
| 115 |
-
"prophet": 0.04736957874192551,
|
| 116 |
-
"naive": 0.01203071428571428
|
| 117 |
-
},
|
| 118 |
-
"stacking_mae": {
|
| 119 |
-
"equal": 0.015656730784239885,
|
| 120 |
-
"best_on_cal": 0.012013652348349491,
|
| 121 |
-
"constrained_mae": 0.012635021721737227,
|
| 122 |
-
"constrained_mse": 0.015656730784239885
|
| 123 |
-
},
|
| 124 |
-
"weights": {
|
| 125 |
-
"constrained_mae": {
|
| 126 |
-
"chronos": 0.3173041077741453,
|
| 127 |
-
"arima": 0.2850093471133051,
|
| 128 |
-
"prophet": 0.10822240332468126,
|
| 129 |
-
"naive": 0.28946414178786833
|
| 130 |
-
},
|
| 131 |
-
"constrained_mse": {
|
| 132 |
-
"chronos": 0.25,
|
| 133 |
-
"arima": 0.25,
|
| 134 |
-
"prophet": 0.25,
|
| 135 |
-
"naive": 0.25
|
| 136 |
-
}
|
| 137 |
-
},
|
| 138 |
-
"best_single_model": "arima",
|
| 139 |
-
"best_single_mae": 0.012013652348349491,
|
| 140 |
-
"winner_method": "arima",
|
| 141 |
-
"winner_mae": 0.012013652348349491,
|
| 142 |
-
"constrained_beats_best_single": false
|
| 143 |
-
},
|
| 144 |
-
"DEXCHUS_h7": {
|
| 145 |
-
"n_cal_points": 70,
|
| 146 |
-
"n_test_points": 70,
|
| 147 |
-
"individual_mae": {
|
| 148 |
-
"chronos": 0.019519044701712434,
|
| 149 |
-
"arima": 0.017992622791365688,
|
| 150 |
-
"prophet": 0.11663701396527856,
|
| 151 |
-
"naive": 0.01873000000000015
|
| 152 |
-
},
|
| 153 |
-
"stacking_mae": {
|
| 154 |
-
"equal": 0.03595753473515902,
|
| 155 |
-
"best_on_cal": 0.019519044701712434,
|
| 156 |
-
"constrained_mae": 0.020133491932037322,
|
| 157 |
-
"constrained_mse": 0.019334668170698382
|
| 158 |
-
},
|
| 159 |
-
"weights": {
|
| 160 |
-
"constrained_mae": {
|
| 161 |
-
"chronos": 0.7133898921965662,
|
| 162 |
-
"arima": 0.21870528495965705,
|
| 163 |
-
"prophet": 0.06790482284377684,
|
| 164 |
-
"naive": 0.0
|
| 165 |
-
},
|
| 166 |
-
"constrained_mse": {
|
| 167 |
-
"chronos": 0.935153684195057,
|
| 168 |
-
"arima": 8.998878031629688e-18,
|
| 169 |
-
"prophet": 0.008348340456592942,
|
| 170 |
-
"naive": 0.056497975348350146
|
| 171 |
-
}
|
| 172 |
-
},
|
| 173 |
-
"best_single_model": "arima",
|
| 174 |
-
"best_single_mae": 0.017992622791365688,
|
| 175 |
-
"winner_method": "arima",
|
| 176 |
-
"winner_mae": 0.017992622791365688,
|
| 177 |
-
"constrained_beats_best_single": false
|
| 178 |
-
},
|
| 179 |
-
"DEXCHUS_h14": {
|
| 180 |
-
"n_cal_points": 140,
|
| 181 |
-
"n_test_points": 140,
|
| 182 |
-
"individual_mae": {
|
| 183 |
-
"chronos": 0.03237065534319195,
|
| 184 |
-
"arima": 0.03236972869761379,
|
| 185 |
-
"prophet": 0.12129274215959333,
|
| 186 |
-
"naive": 0.03212142857142869
|
| 187 |
-
},
|
| 188 |
-
"stacking_mae": {
|
| 189 |
-
"equal": 0.043605583896191145,
|
| 190 |
-
"best_on_cal": 0.03237065534319195,
|
| 191 |
-
"constrained_mae": 0.031424293689945516,
|
| 192 |
-
"constrained_mse": 0.034848071305054344
|
| 193 |
-
},
|
| 194 |
-
"weights": {
|
| 195 |
-
"constrained_mae": {
|
| 196 |
-
"chronos": 0.6699556648170705,
|
| 197 |
-
"arima": 0.251108263144011,
|
| 198 |
-
"prophet": 0.07893607203891846,
|
| 199 |
-
"naive": 6.03983418880819e-19
|
| 200 |
-
},
|
| 201 |
-
"constrained_mse": {
|
| 202 |
-
"chronos": 0.8500735106653095,
|
| 203 |
-
"arima": 0.0,
|
| 204 |
-
"prophet": 0.14992648933469047,
|
| 205 |
-
"naive": 0.0
|
| 206 |
-
}
|
| 207 |
-
},
|
| 208 |
-
"best_single_model": "naive",
|
| 209 |
-
"best_single_mae": 0.03212142857142869,
|
| 210 |
-
"winner_method": "constrained_mae",
|
| 211 |
-
"winner_mae": 0.031424293689945516,
|
| 212 |
-
"constrained_beats_best_single": true
|
| 213 |
-
}
|
| 214 |
-
},
|
| 215 |
-
"wins": {
|
| 216 |
-
"constrained": 2,
|
| 217 |
-
"best_single": 4,
|
| 218 |
-
"equal": 0,
|
| 219 |
-
"naive": 0
|
| 220 |
-
},
|
| 221 |
-
"summary": {
|
| 222 |
-
"total_target_horizon_cells": 6,
|
| 223 |
-
"constrained_stacking_wins": 2,
|
| 224 |
-
"constrained_beats_best_single_cells": 2
|
| 225 |
-
},
|
| 226 |
-
"elapsed_min": 2.2175209800402325
|
| 227 |
}
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"description": "Per-point Bates-Granger constrained stacking on real forecaster outputs. No synthesized folds.",
|
| 3 |
+
"per_target_horizon": {
|
| 4 |
+
"DCOILWTICO_h7": {
|
| 5 |
+
"n_cal_points": 70,
|
| 6 |
+
"n_test_points": 70,
|
| 7 |
+
"individual_mae": {
|
| 8 |
+
"chronos": 3.006047764369419,
|
| 9 |
+
"arima": 3.0841361525087674,
|
| 10 |
+
"prophet": 8.557134422551027,
|
| 11 |
+
"naive": 2.839285714285714
|
| 12 |
+
},
|
| 13 |
+
"stacking_mae": {
|
| 14 |
+
"equal": 3.381860717562512,
|
| 15 |
+
"best_on_cal": 2.839285714285714,
|
| 16 |
+
"constrained_mae": 2.839285714285714,
|
| 17 |
+
"constrained_mse": 2.839285714285714
|
| 18 |
+
},
|
| 19 |
+
"weights": {
|
| 20 |
+
"constrained_mae": {
|
| 21 |
+
"chronos": 0.0,
|
| 22 |
+
"arima": 3.8857805861880464e-16,
|
| 23 |
+
"prophet": 0.0,
|
| 24 |
+
"naive": 0.9999999999999996
|
| 25 |
+
},
|
| 26 |
+
"constrained_mse": {
|
| 27 |
+
"chronos": 1.2281842209915794e-15,
|
| 28 |
+
"arima": 1.7069679003611782e-15,
|
| 29 |
+
"prophet": 6.824272182231614e-17,
|
| 30 |
+
"naive": 0.999999999999997
|
| 31 |
+
}
|
| 32 |
+
},
|
| 33 |
+
"best_single_model": "naive",
|
| 34 |
+
"best_single_mae": 2.839285714285714,
|
| 35 |
+
"winner_method": "naive",
|
| 36 |
+
"winner_mae": 2.839285714285714,
|
| 37 |
+
"constrained_beats_best_single": false
|
| 38 |
+
},
|
| 39 |
+
"DCOILWTICO_h14": {
|
| 40 |
+
"n_cal_points": 140,
|
| 41 |
+
"n_test_points": 140,
|
| 42 |
+
"individual_mae": {
|
| 43 |
+
"chronos": 3.797937408447266,
|
| 44 |
+
"arima": 3.917782537843266,
|
| 45 |
+
"prophet": 9.218187229009528,
|
| 46 |
+
"naive": 3.6239285714285714
|
| 47 |
+
},
|
| 48 |
+
"stacking_mae": {
|
| 49 |
+
"equal": 3.9604401984158755,
|
| 50 |
+
"best_on_cal": 3.6239285714285714,
|
| 51 |
+
"constrained_mae": 3.623928571428571,
|
| 52 |
+
"constrained_mse": 3.6994484688718305
|
| 53 |
+
},
|
| 54 |
+
"weights": {
|
| 55 |
+
"constrained_mae": {
|
| 56 |
+
"chronos": 1.3877787807814454e-16,
|
| 57 |
+
"arima": 0.0,
|
| 58 |
+
"prophet": 0.0,
|
| 59 |
+
"naive": 0.9999999999999998
|
| 60 |
+
},
|
| 61 |
+
"constrained_mse": {
|
| 62 |
+
"chronos": 3.0753177782116836e-14,
|
| 63 |
+
"arima": 0.25973397692659406,
|
| 64 |
+
"prophet": 1.0636618946679322e-15,
|
| 65 |
+
"naive": 0.7402660230733741
|
| 66 |
+
}
|
| 67 |
+
},
|
| 68 |
+
"best_single_model": "naive",
|
| 69 |
+
"best_single_mae": 3.6239285714285714,
|
| 70 |
+
"winner_method": "constrained_mae",
|
| 71 |
+
"winner_mae": 3.623928571428571,
|
| 72 |
+
"constrained_beats_best_single": true
|
| 73 |
+
},
|
| 74 |
+
"DEXUSEU_h7": {
|
| 75 |
+
"n_cal_points": 70,
|
| 76 |
+
"n_test_points": 70,
|
| 77 |
+
"individual_mae": {
|
| 78 |
+
"chronos": 0.00997808286394391,
|
| 79 |
+
"arima": 0.00909829887487626,
|
| 80 |
+
"prophet": 0.04588529230089117,
|
| 81 |
+
"naive": 0.009057142857142856
|
| 82 |
+
},
|
| 83 |
+
"stacking_mae": {
|
| 84 |
+
"equal": 0.013885443002327432,
|
| 85 |
+
"best_on_cal": 0.00997808286394391,
|
| 86 |
+
"constrained_mae": 0.009495985176023706,
|
| 87 |
+
"constrained_mse": 0.013885443002327432
|
| 88 |
+
},
|
| 89 |
+
"weights": {
|
| 90 |
+
"constrained_mae": {
|
| 91 |
+
"chronos": 0.3382904222928093,
|
| 92 |
+
"arima": 0.2908333034179931,
|
| 93 |
+
"prophet": 0.07824807605162067,
|
| 94 |
+
"naive": 0.292628198237577
|
| 95 |
+
},
|
| 96 |
+
"constrained_mse": {
|
| 97 |
+
"chronos": 0.25,
|
| 98 |
+
"arima": 0.25,
|
| 99 |
+
"prophet": 0.25,
|
| 100 |
+
"naive": 0.25
|
| 101 |
+
}
|
| 102 |
+
},
|
| 103 |
+
"best_single_model": "naive",
|
| 104 |
+
"best_single_mae": 0.009057142857142856,
|
| 105 |
+
"winner_method": "naive",
|
| 106 |
+
"winner_mae": 0.009057142857142856,
|
| 107 |
+
"constrained_beats_best_single": false
|
| 108 |
+
},
|
| 109 |
+
"DEXUSEU_h14": {
|
| 110 |
+
"n_cal_points": 140,
|
| 111 |
+
"n_test_points": 140,
|
| 112 |
+
"individual_mae": {
|
| 113 |
+
"chronos": 0.013727861084256852,
|
| 114 |
+
"arima": 0.012013652348349491,
|
| 115 |
+
"prophet": 0.04736957874192551,
|
| 116 |
+
"naive": 0.01203071428571428
|
| 117 |
+
},
|
| 118 |
+
"stacking_mae": {
|
| 119 |
+
"equal": 0.015656730784239885,
|
| 120 |
+
"best_on_cal": 0.012013652348349491,
|
| 121 |
+
"constrained_mae": 0.012635021721737227,
|
| 122 |
+
"constrained_mse": 0.015656730784239885
|
| 123 |
+
},
|
| 124 |
+
"weights": {
|
| 125 |
+
"constrained_mae": {
|
| 126 |
+
"chronos": 0.3173041077741453,
|
| 127 |
+
"arima": 0.2850093471133051,
|
| 128 |
+
"prophet": 0.10822240332468126,
|
| 129 |
+
"naive": 0.28946414178786833
|
| 130 |
+
},
|
| 131 |
+
"constrained_mse": {
|
| 132 |
+
"chronos": 0.25,
|
| 133 |
+
"arima": 0.25,
|
| 134 |
+
"prophet": 0.25,
|
| 135 |
+
"naive": 0.25
|
| 136 |
+
}
|
| 137 |
+
},
|
| 138 |
+
"best_single_model": "arima",
|
| 139 |
+
"best_single_mae": 0.012013652348349491,
|
| 140 |
+
"winner_method": "arima",
|
| 141 |
+
"winner_mae": 0.012013652348349491,
|
| 142 |
+
"constrained_beats_best_single": false
|
| 143 |
+
},
|
| 144 |
+
"DEXCHUS_h7": {
|
| 145 |
+
"n_cal_points": 70,
|
| 146 |
+
"n_test_points": 70,
|
| 147 |
+
"individual_mae": {
|
| 148 |
+
"chronos": 0.019519044701712434,
|
| 149 |
+
"arima": 0.017992622791365688,
|
| 150 |
+
"prophet": 0.11663701396527856,
|
| 151 |
+
"naive": 0.01873000000000015
|
| 152 |
+
},
|
| 153 |
+
"stacking_mae": {
|
| 154 |
+
"equal": 0.03595753473515902,
|
| 155 |
+
"best_on_cal": 0.019519044701712434,
|
| 156 |
+
"constrained_mae": 0.020133491932037322,
|
| 157 |
+
"constrained_mse": 0.019334668170698382
|
| 158 |
+
},
|
| 159 |
+
"weights": {
|
| 160 |
+
"constrained_mae": {
|
| 161 |
+
"chronos": 0.7133898921965662,
|
| 162 |
+
"arima": 0.21870528495965705,
|
| 163 |
+
"prophet": 0.06790482284377684,
|
| 164 |
+
"naive": 0.0
|
| 165 |
+
},
|
| 166 |
+
"constrained_mse": {
|
| 167 |
+
"chronos": 0.935153684195057,
|
| 168 |
+
"arima": 8.998878031629688e-18,
|
| 169 |
+
"prophet": 0.008348340456592942,
|
| 170 |
+
"naive": 0.056497975348350146
|
| 171 |
+
}
|
| 172 |
+
},
|
| 173 |
+
"best_single_model": "arima",
|
| 174 |
+
"best_single_mae": 0.017992622791365688,
|
| 175 |
+
"winner_method": "arima",
|
| 176 |
+
"winner_mae": 0.017992622791365688,
|
| 177 |
+
"constrained_beats_best_single": false
|
| 178 |
+
},
|
| 179 |
+
"DEXCHUS_h14": {
|
| 180 |
+
"n_cal_points": 140,
|
| 181 |
+
"n_test_points": 140,
|
| 182 |
+
"individual_mae": {
|
| 183 |
+
"chronos": 0.03237065534319195,
|
| 184 |
+
"arima": 0.03236972869761379,
|
| 185 |
+
"prophet": 0.12129274215959333,
|
| 186 |
+
"naive": 0.03212142857142869
|
| 187 |
+
},
|
| 188 |
+
"stacking_mae": {
|
| 189 |
+
"equal": 0.043605583896191145,
|
| 190 |
+
"best_on_cal": 0.03237065534319195,
|
| 191 |
+
"constrained_mae": 0.031424293689945516,
|
| 192 |
+
"constrained_mse": 0.034848071305054344
|
| 193 |
+
},
|
| 194 |
+
"weights": {
|
| 195 |
+
"constrained_mae": {
|
| 196 |
+
"chronos": 0.6699556648170705,
|
| 197 |
+
"arima": 0.251108263144011,
|
| 198 |
+
"prophet": 0.07893607203891846,
|
| 199 |
+
"naive": 6.03983418880819e-19
|
| 200 |
+
},
|
| 201 |
+
"constrained_mse": {
|
| 202 |
+
"chronos": 0.8500735106653095,
|
| 203 |
+
"arima": 0.0,
|
| 204 |
+
"prophet": 0.14992648933469047,
|
| 205 |
+
"naive": 0.0
|
| 206 |
+
}
|
| 207 |
+
},
|
| 208 |
+
"best_single_model": "naive",
|
| 209 |
+
"best_single_mae": 0.03212142857142869,
|
| 210 |
+
"winner_method": "constrained_mae",
|
| 211 |
+
"winner_mae": 0.031424293689945516,
|
| 212 |
+
"constrained_beats_best_single": true
|
| 213 |
+
}
|
| 214 |
+
},
|
| 215 |
+
"wins": {
|
| 216 |
+
"constrained": 2,
|
| 217 |
+
"best_single": 4,
|
| 218 |
+
"equal": 0,
|
| 219 |
+
"naive": 0
|
| 220 |
+
},
|
| 221 |
+
"summary": {
|
| 222 |
+
"total_target_horizon_cells": 6,
|
| 223 |
+
"constrained_stacking_wins": 2,
|
| 224 |
+
"constrained_beats_best_single_cells": 2
|
| 225 |
+
},
|
| 226 |
+
"elapsed_min": 2.2175209800402325
|
| 227 |
}
|
FINAL_SUBMIT/receipts/R3_TIMESFM_QUANTILE.json
CHANGED
|
@@ -1,130 +1,130 @@
|
|
| 1 |
-
{
|
| 2 |
-
"method": "per-horizon split-conformal wrapper on TimesFM point forecasts",
|
| 3 |
-
"comparison": "Chronos-Bolt native quantiles",
|
| 4 |
-
"targets": {
|
| 5 |
-
"DCOILWTICO": {
|
| 6 |
-
"target": "DCOILWTICO",
|
| 7 |
-
"n_cal": 20,
|
| 8 |
-
"n_test": 20,
|
| 9 |
-
"timesfm_conf=0.8": {
|
| 10 |
-
"nominal_coverage": 0.8,
|
| 11 |
-
"empirical_coverage": 0.7464285714285714,
|
| 12 |
-
"mean_width": 11.44973765781948,
|
| 13 |
-
"dev_from_nominal": 0.0535714285714286
|
| 14 |
-
},
|
| 15 |
-
"timesfm_conf=0.9": {
|
| 16 |
-
"nominal_coverage": 0.9,
|
| 17 |
-
"empirical_coverage": 0.8321428571428573,
|
| 18 |
-
"mean_width": 14.322232644217351,
|
| 19 |
-
"dev_from_nominal": 0.06785714285714273
|
| 20 |
-
},
|
| 21 |
-
"timesfm_conf=0.95": {
|
| 22 |
-
"nominal_coverage": 0.95,
|
| 23 |
-
"empirical_coverage": 0.9,
|
| 24 |
-
"mean_width": 17.292571051461362,
|
| 25 |
-
"dev_from_nominal": 0.04999999999999993
|
| 26 |
-
},
|
| 27 |
-
"chronos_native_conf=0.8": {
|
| 28 |
-
"nominal_coverage": 0.8,
|
| 29 |
-
"empirical_coverage": 0.7107142857142856,
|
| 30 |
-
"mean_width": 10.861018967628478,
|
| 31 |
-
"dev_from_nominal": 0.08928571428571441
|
| 32 |
-
},
|
| 33 |
-
"chronos_native_conf=0.9": {
|
| 34 |
-
"nominal_coverage": 0.9,
|
| 35 |
-
"empirical_coverage": 0.7107142857142856,
|
| 36 |
-
"mean_width": 10.861018967628478,
|
| 37 |
-
"dev_from_nominal": 0.1892857142857144
|
| 38 |
-
},
|
| 39 |
-
"chronos_native_conf=0.95": {
|
| 40 |
-
"nominal_coverage": 0.95,
|
| 41 |
-
"empirical_coverage": 0.7107142857142856,
|
| 42 |
-
"mean_width": 10.861018967628478,
|
| 43 |
-
"dev_from_nominal": 0.23928571428571432
|
| 44 |
-
}
|
| 45 |
-
},
|
| 46 |
-
"DEXJPUS": {
|
| 47 |
-
"target": "DEXJPUS",
|
| 48 |
-
"n_cal": 20,
|
| 49 |
-
"n_test": 20,
|
| 50 |
-
"timesfm_conf=0.8": {
|
| 51 |
-
"nominal_coverage": 0.8,
|
| 52 |
-
"empirical_coverage": 0.7464285714285714,
|
| 53 |
-
"mean_width": 5.831283089773991,
|
| 54 |
-
"dev_from_nominal": 0.0535714285714286
|
| 55 |
-
},
|
| 56 |
-
"timesfm_conf=0.9": {
|
| 57 |
-
"nominal_coverage": 0.9,
|
| 58 |
-
"empirical_coverage": 0.7928571428571428,
|
| 59 |
-
"mean_width": 6.870930001395079,
|
| 60 |
-
"dev_from_nominal": 0.1071428571428572
|
| 61 |
-
},
|
| 62 |
-
"timesfm_conf=0.95": {
|
| 63 |
-
"nominal_coverage": 0.95,
|
| 64 |
-
"empirical_coverage": 0.8035714285714285,
|
| 65 |
-
"mean_width": 7.547866254534036,
|
| 66 |
-
"dev_from_nominal": 0.14642857142857146
|
| 67 |
-
},
|
| 68 |
-
"chronos_native_conf=0.8": {
|
| 69 |
-
"nominal_coverage": 0.8,
|
| 70 |
-
"empirical_coverage": 0.742857142857143,
|
| 71 |
-
"mean_width": 5.904579341411591,
|
| 72 |
-
"dev_from_nominal": 0.05714285714285705
|
| 73 |
-
},
|
| 74 |
-
"chronos_native_conf=0.9": {
|
| 75 |
-
"nominal_coverage": 0.9,
|
| 76 |
-
"empirical_coverage": 0.742857142857143,
|
| 77 |
-
"mean_width": 5.904579341411591,
|
| 78 |
-
"dev_from_nominal": 0.15714285714285703
|
| 79 |
-
},
|
| 80 |
-
"chronos_native_conf=0.95": {
|
| 81 |
-
"nominal_coverage": 0.95,
|
| 82 |
-
"empirical_coverage": 0.742857142857143,
|
| 83 |
-
"mean_width": 5.904579341411591,
|
| 84 |
-
"dev_from_nominal": 0.20714285714285696
|
| 85 |
-
}
|
| 86 |
-
},
|
| 87 |
-
"DEXUSEU": {
|
| 88 |
-
"target": "DEXUSEU",
|
| 89 |
-
"n_cal": 20,
|
| 90 |
-
"n_test": 20,
|
| 91 |
-
"timesfm_conf=0.8": {
|
| 92 |
-
"nominal_coverage": 0.8,
|
| 93 |
-
"empirical_coverage": 0.9071428571428573,
|
| 94 |
-
"mean_width": 0.06282055849347795,
|
| 95 |
-
"dev_from_nominal": 0.1071428571428572
|
| 96 |
-
},
|
| 97 |
-
"timesfm_conf=0.9": {
|
| 98 |
-
"nominal_coverage": 0.9,
|
| 99 |
-
"empirical_coverage": 0.9678571428571429,
|
| 100 |
-
"mean_width": 0.08470568656921382,
|
| 101 |
-
"dev_from_nominal": 0.06785714285714284
|
| 102 |
-
},
|
| 103 |
-
"timesfm_conf=0.95": {
|
| 104 |
-
"nominal_coverage": 0.95,
|
| 105 |
-
"empirical_coverage": 0.9821428571428571,
|
| 106 |
-
"mean_width": 0.09796196365356444,
|
| 107 |
-
"dev_from_nominal": 0.03214285714285714
|
| 108 |
-
},
|
| 109 |
-
"chronos_native_conf=0.8": {
|
| 110 |
-
"nominal_coverage": 0.8,
|
| 111 |
-
"empirical_coverage": 0.7357142857142858,
|
| 112 |
-
"mean_width": 0.03356509944424033,
|
| 113 |
-
"dev_from_nominal": 0.06428571428571428
|
| 114 |
-
},
|
| 115 |
-
"chronos_native_conf=0.9": {
|
| 116 |
-
"nominal_coverage": 0.9,
|
| 117 |
-
"empirical_coverage": 0.7357142857142858,
|
| 118 |
-
"mean_width": 0.03356509944424033,
|
| 119 |
-
"dev_from_nominal": 0.16428571428571426
|
| 120 |
-
},
|
| 121 |
-
"chronos_native_conf=0.95": {
|
| 122 |
-
"nominal_coverage": 0.95,
|
| 123 |
-
"empirical_coverage": 0.7357142857142858,
|
| 124 |
-
"mean_width": 0.03356509944424033,
|
| 125 |
-
"dev_from_nominal": 0.2142857142857142
|
| 126 |
-
}
|
| 127 |
-
}
|
| 128 |
-
},
|
| 129 |
-
"elapsed_min": 0.5109713474909464
|
| 130 |
}
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"method": "per-horizon split-conformal wrapper on TimesFM point forecasts",
|
| 3 |
+
"comparison": "Chronos-Bolt native quantiles",
|
| 4 |
+
"targets": {
|
| 5 |
+
"DCOILWTICO": {
|
| 6 |
+
"target": "DCOILWTICO",
|
| 7 |
+
"n_cal": 20,
|
| 8 |
+
"n_test": 20,
|
| 9 |
+
"timesfm_conf=0.8": {
|
| 10 |
+
"nominal_coverage": 0.8,
|
| 11 |
+
"empirical_coverage": 0.7464285714285714,
|
| 12 |
+
"mean_width": 11.44973765781948,
|
| 13 |
+
"dev_from_nominal": 0.0535714285714286
|
| 14 |
+
},
|
| 15 |
+
"timesfm_conf=0.9": {
|
| 16 |
+
"nominal_coverage": 0.9,
|
| 17 |
+
"empirical_coverage": 0.8321428571428573,
|
| 18 |
+
"mean_width": 14.322232644217351,
|
| 19 |
+
"dev_from_nominal": 0.06785714285714273
|
| 20 |
+
},
|
| 21 |
+
"timesfm_conf=0.95": {
|
| 22 |
+
"nominal_coverage": 0.95,
|
| 23 |
+
"empirical_coverage": 0.9,
|
| 24 |
+
"mean_width": 17.292571051461362,
|
| 25 |
+
"dev_from_nominal": 0.04999999999999993
|
| 26 |
+
},
|
| 27 |
+
"chronos_native_conf=0.8": {
|
| 28 |
+
"nominal_coverage": 0.8,
|
| 29 |
+
"empirical_coverage": 0.7107142857142856,
|
| 30 |
+
"mean_width": 10.861018967628478,
|
| 31 |
+
"dev_from_nominal": 0.08928571428571441
|
| 32 |
+
},
|
| 33 |
+
"chronos_native_conf=0.9": {
|
| 34 |
+
"nominal_coverage": 0.9,
|
| 35 |
+
"empirical_coverage": 0.7107142857142856,
|
| 36 |
+
"mean_width": 10.861018967628478,
|
| 37 |
+
"dev_from_nominal": 0.1892857142857144
|
| 38 |
+
},
|
| 39 |
+
"chronos_native_conf=0.95": {
|
| 40 |
+
"nominal_coverage": 0.95,
|
| 41 |
+
"empirical_coverage": 0.7107142857142856,
|
| 42 |
+
"mean_width": 10.861018967628478,
|
| 43 |
+
"dev_from_nominal": 0.23928571428571432
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"DEXJPUS": {
|
| 47 |
+
"target": "DEXJPUS",
|
| 48 |
+
"n_cal": 20,
|
| 49 |
+
"n_test": 20,
|
| 50 |
+
"timesfm_conf=0.8": {
|
| 51 |
+
"nominal_coverage": 0.8,
|
| 52 |
+
"empirical_coverage": 0.7464285714285714,
|
| 53 |
+
"mean_width": 5.831283089773991,
|
| 54 |
+
"dev_from_nominal": 0.0535714285714286
|
| 55 |
+
},
|
| 56 |
+
"timesfm_conf=0.9": {
|
| 57 |
+
"nominal_coverage": 0.9,
|
| 58 |
+
"empirical_coverage": 0.7928571428571428,
|
| 59 |
+
"mean_width": 6.870930001395079,
|
| 60 |
+
"dev_from_nominal": 0.1071428571428572
|
| 61 |
+
},
|
| 62 |
+
"timesfm_conf=0.95": {
|
| 63 |
+
"nominal_coverage": 0.95,
|
| 64 |
+
"empirical_coverage": 0.8035714285714285,
|
| 65 |
+
"mean_width": 7.547866254534036,
|
| 66 |
+
"dev_from_nominal": 0.14642857142857146
|
| 67 |
+
},
|
| 68 |
+
"chronos_native_conf=0.8": {
|
| 69 |
+
"nominal_coverage": 0.8,
|
| 70 |
+
"empirical_coverage": 0.742857142857143,
|
| 71 |
+
"mean_width": 5.904579341411591,
|
| 72 |
+
"dev_from_nominal": 0.05714285714285705
|
| 73 |
+
},
|
| 74 |
+
"chronos_native_conf=0.9": {
|
| 75 |
+
"nominal_coverage": 0.9,
|
| 76 |
+
"empirical_coverage": 0.742857142857143,
|
| 77 |
+
"mean_width": 5.904579341411591,
|
| 78 |
+
"dev_from_nominal": 0.15714285714285703
|
| 79 |
+
},
|
| 80 |
+
"chronos_native_conf=0.95": {
|
| 81 |
+
"nominal_coverage": 0.95,
|
| 82 |
+
"empirical_coverage": 0.742857142857143,
|
| 83 |
+
"mean_width": 5.904579341411591,
|
| 84 |
+
"dev_from_nominal": 0.20714285714285696
|
| 85 |
+
}
|
| 86 |
+
},
|
| 87 |
+
"DEXUSEU": {
|
| 88 |
+
"target": "DEXUSEU",
|
| 89 |
+
"n_cal": 20,
|
| 90 |
+
"n_test": 20,
|
| 91 |
+
"timesfm_conf=0.8": {
|
| 92 |
+
"nominal_coverage": 0.8,
|
| 93 |
+
"empirical_coverage": 0.9071428571428573,
|
| 94 |
+
"mean_width": 0.06282055849347795,
|
| 95 |
+
"dev_from_nominal": 0.1071428571428572
|
| 96 |
+
},
|
| 97 |
+
"timesfm_conf=0.9": {
|
| 98 |
+
"nominal_coverage": 0.9,
|
| 99 |
+
"empirical_coverage": 0.9678571428571429,
|
| 100 |
+
"mean_width": 0.08470568656921382,
|
| 101 |
+
"dev_from_nominal": 0.06785714285714284
|
| 102 |
+
},
|
| 103 |
+
"timesfm_conf=0.95": {
|
| 104 |
+
"nominal_coverage": 0.95,
|
| 105 |
+
"empirical_coverage": 0.9821428571428571,
|
| 106 |
+
"mean_width": 0.09796196365356444,
|
| 107 |
+
"dev_from_nominal": 0.03214285714285714
|
| 108 |
+
},
|
| 109 |
+
"chronos_native_conf=0.8": {
|
| 110 |
+
"nominal_coverage": 0.8,
|
| 111 |
+
"empirical_coverage": 0.7357142857142858,
|
| 112 |
+
"mean_width": 0.03356509944424033,
|
| 113 |
+
"dev_from_nominal": 0.06428571428571428
|
| 114 |
+
},
|
| 115 |
+
"chronos_native_conf=0.9": {
|
| 116 |
+
"nominal_coverage": 0.9,
|
| 117 |
+
"empirical_coverage": 0.7357142857142858,
|
| 118 |
+
"mean_width": 0.03356509944424033,
|
| 119 |
+
"dev_from_nominal": 0.16428571428571426
|
| 120 |
+
},
|
| 121 |
+
"chronos_native_conf=0.95": {
|
| 122 |
+
"nominal_coverage": 0.95,
|
| 123 |
+
"empirical_coverage": 0.7357142857142858,
|
| 124 |
+
"mean_width": 0.03356509944424033,
|
| 125 |
+
"dev_from_nominal": 0.2142857142857142
|
| 126 |
+
}
|
| 127 |
+
}
|
| 128 |
+
},
|
| 129 |
+
"elapsed_min": 0.5109713474909464
|
| 130 |
}
|
FINAL_SUBMIT/receipts/R4_DANGEROUS_V2.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
FINAL_SUBMIT/receipts/R4_DANGEROUS_V2_ABLATION.json
CHANGED
|
@@ -1,397 +1,397 @@
|
|
| 1 |
-
{
|
| 2 |
-
"description": "R4 ablation: DeepSeek-R1-Q4 reassigned to devil's-advocate (consulted, not voting). Primary consensus = Qwen-14B + Mistral-Nemo.",
|
| 3 |
-
"primary_judges": [
|
| 4 |
-
"qwen25-14b-local",
|
| 5 |
-
"mistral-nemo-local"
|
| 6 |
-
],
|
| 7 |
-
"devils_advocate": "deepseek-r1-local-q4",
|
| 8 |
-
"n_scenarios": 26,
|
| 9 |
-
"agreement_primary_panel": {
|
| 10 |
-
"krippendorff_alpha_ordinal": 0.7499056959637873,
|
| 11 |
-
"cohen_weighted_kappa_qwen_vs_mistral": 0.7473841554559043
|
| 12 |
-
},
|
| 13 |
-
"accuracy_vs_ground_truth": {
|
| 14 |
-
"primary_majority_vote": {
|
| 15 |
-
"correct": 16,
|
| 16 |
-
"total": 26,
|
| 17 |
-
"accuracy": 0.6153846153846154
|
| 18 |
-
},
|
| 19 |
-
"three_judge_majority_vote_ORIGINAL": {
|
| 20 |
-
"correct": 18,
|
| 21 |
-
"total": 26,
|
| 22 |
-
"accuracy": 0.6923076923076923
|
| 23 |
-
},
|
| 24 |
-
"devils_advocate_deepseek": {
|
| 25 |
-
"correct": 8,
|
| 26 |
-
"total": 26,
|
| 27 |
-
"accuracy": 0.3076923076923077
|
| 28 |
-
}
|
| 29 |
-
},
|
| 30 |
-
"confusion_matrix_primary": [
|
| 31 |
-
[
|
| 32 |
-
7,
|
| 33 |
-
0,
|
| 34 |
-
0,
|
| 35 |
-
0
|
| 36 |
-
],
|
| 37 |
-
[
|
| 38 |
-
2,
|
| 39 |
-
5,
|
| 40 |
-
0,
|
| 41 |
-
0
|
| 42 |
-
],
|
| 43 |
-
[
|
| 44 |
-
0,
|
| 45 |
-
5,
|
| 46 |
-
3,
|
| 47 |
-
1
|
| 48 |
-
],
|
| 49 |
-
[
|
| 50 |
-
0,
|
| 51 |
-
0,
|
| 52 |
-
2,
|
| 53 |
-
1
|
| 54 |
-
]
|
| 55 |
-
],
|
| 56 |
-
"confusion_matrix_three_judge_ORIGINAL": [
|
| 57 |
-
[
|
| 58 |
-
7,
|
| 59 |
-
0,
|
| 60 |
-
0,
|
| 61 |
-
0
|
| 62 |
-
],
|
| 63 |
-
[
|
| 64 |
-
2,
|
| 65 |
-
3,
|
| 66 |
-
2,
|
| 67 |
-
0
|
| 68 |
-
],
|
| 69 |
-
[
|
| 70 |
-
0,
|
| 71 |
-
2,
|
| 72 |
-
7,
|
| 73 |
-
0
|
| 74 |
-
],
|
| 75 |
-
[
|
| 76 |
-
0,
|
| 77 |
-
0,
|
| 78 |
-
2,
|
| 79 |
-
1
|
| 80 |
-
]
|
| 81 |
-
],
|
| 82 |
-
"calibration_ece_primary": 0.2894230769230769,
|
| 83 |
-
"per_scenario": {
|
| 84 |
-
"2011_T\u014dhoku_earthquake_and_tsunami": {
|
| 85 |
-
"ground_truth": "CRITICAL",
|
| 86 |
-
"primary_panel_ratings": [
|
| 87 |
-
4,
|
| 88 |
-
4
|
| 89 |
-
],
|
| 90 |
-
"primary_majority": "CRITICAL",
|
| 91 |
-
"devil_rating": "HIGH",
|
| 92 |
-
"three_judge_majority": "CRITICAL",
|
| 93 |
-
"primary_correct": true,
|
| 94 |
-
"devil_correct": false
|
| 95 |
-
},
|
| 96 |
-
"2020\u20132023_global_chip_shortage": {
|
| 97 |
-
"ground_truth": "CRITICAL",
|
| 98 |
-
"primary_panel_ratings": [
|
| 99 |
-
3,
|
| 100 |
-
3
|
| 101 |
-
],
|
| 102 |
-
"primary_majority": "HIGH",
|
| 103 |
-
"devil_rating": "CRITICAL",
|
| 104 |
-
"three_judge_majority": "HIGH",
|
| 105 |
-
"primary_correct": false,
|
| 106 |
-
"devil_correct": true
|
| 107 |
-
},
|
| 108 |
-
"2021_Suez_Canal_obstruction": {
|
| 109 |
-
"ground_truth": "HIGH",
|
| 110 |
-
"primary_panel_ratings": [
|
| 111 |
-
3,
|
| 112 |
-
3
|
| 113 |
-
],
|
| 114 |
-
"primary_majority": "HIGH",
|
| 115 |
-
"devil_rating": "HIGH",
|
| 116 |
-
"three_judge_majority": "HIGH",
|
| 117 |
-
"primary_correct": true,
|
| 118 |
-
"devil_correct": true
|
| 119 |
-
},
|
| 120 |
-
"Bab-el-Mandeb": {
|
| 121 |
-
"ground_truth": "HIGH",
|
| 122 |
-
"primary_panel_ratings": [
|
| 123 |
-
2,
|
| 124 |
-
1
|
| 125 |
-
],
|
| 126 |
-
"primary_majority": "MEDIUM",
|
| 127 |
-
"devil_rating": "HIGH",
|
| 128 |
-
"three_judge_majority": "MEDIUM",
|
| 129 |
-
"primary_correct": false,
|
| 130 |
-
"devil_correct": true
|
| 131 |
-
},
|
| 132 |
-
"Baltic_Dry_Index": {
|
| 133 |
-
"ground_truth": "LOW",
|
| 134 |
-
"primary_panel_ratings": [
|
| 135 |
-
1,
|
| 136 |
-
1
|
| 137 |
-
],
|
| 138 |
-
"primary_majority": "LOW",
|
| 139 |
-
"devil_rating": "HIGH",
|
| 140 |
-
"three_judge_majority": "LOW",
|
| 141 |
-
"primary_correct": true,
|
| 142 |
-
"devil_correct": false
|
| 143 |
-
},
|
| 144 |
-
"Bullwhip_effect": {
|
| 145 |
-
"ground_truth": "MEDIUM",
|
| 146 |
-
"primary_panel_ratings": [
|
| 147 |
-
1,
|
| 148 |
-
1
|
| 149 |
-
],
|
| 150 |
-
"primary_majority": "LOW",
|
| 151 |
-
"devil_rating": "HIGH",
|
| 152 |
-
"three_judge_majority": "LOW",
|
| 153 |
-
"primary_correct": false,
|
| 154 |
-
"devil_correct": false
|
| 155 |
-
},
|
| 156 |
-
"CHIPS_and_Science_Act": {
|
| 157 |
-
"ground_truth": "MEDIUM",
|
| 158 |
-
"primary_panel_ratings": [
|
| 159 |
-
1,
|
| 160 |
-
2
|
| 161 |
-
],
|
| 162 |
-
"primary_majority": "MEDIUM",
|
| 163 |
-
"devil_rating": "HIGH",
|
| 164 |
-
"three_judge_majority": "MEDIUM",
|
| 165 |
-
"primary_correct": true,
|
| 166 |
-
"devil_correct": false
|
| 167 |
-
},
|
| 168 |
-
"Container_ship": {
|
| 169 |
-
"ground_truth": "LOW",
|
| 170 |
-
"primary_panel_ratings": [
|
| 171 |
-
1,
|
| 172 |
-
1
|
| 173 |
-
],
|
| 174 |
-
"primary_majority": "LOW",
|
| 175 |
-
"devil_rating": "HIGH",
|
| 176 |
-
"three_judge_majority": "LOW",
|
| 177 |
-
"primary_correct": true,
|
| 178 |
-
"devil_correct": false
|
| 179 |
-
},
|
| 180 |
-
"Enterprise_resource_planning": {
|
| 181 |
-
"ground_truth": "LOW",
|
| 182 |
-
"primary_panel_ratings": [
|
| 183 |
-
1,
|
| 184 |
-
1
|
| 185 |
-
],
|
| 186 |
-
"primary_majority": "LOW",
|
| 187 |
-
"devil_rating": "MEDIUM",
|
| 188 |
-
"three_judge_majority": "LOW",
|
| 189 |
-
"primary_correct": true,
|
| 190 |
-
"devil_correct": false
|
| 191 |
-
},
|
| 192 |
-
"Ever_Given": {
|
| 193 |
-
"ground_truth": "HIGH",
|
| 194 |
-
"primary_panel_ratings": [
|
| 195 |
-
2,
|
| 196 |
-
3
|
| 197 |
-
],
|
| 198 |
-
"primary_majority": "MEDIUM",
|
| 199 |
-
"devil_rating": "HIGH",
|
| 200 |
-
"three_judge_majority": "HIGH",
|
| 201 |
-
"primary_correct": false,
|
| 202 |
-
"devil_correct": true
|
| 203 |
-
},
|
| 204 |
-
"Foxconn": {
|
| 205 |
-
"ground_truth": "MEDIUM",
|
| 206 |
-
"primary_panel_ratings": [
|
| 207 |
-
3,
|
| 208 |
-
2
|
| 209 |
-
],
|
| 210 |
-
"primary_majority": "MEDIUM",
|
| 211 |
-
"devil_rating": "HIGH",
|
| 212 |
-
"three_judge_majority": "HIGH",
|
| 213 |
-
"primary_correct": true,
|
| 214 |
-
"devil_correct": false
|
| 215 |
-
},
|
| 216 |
-
"Inventory": {
|
| 217 |
-
"ground_truth": "LOW",
|
| 218 |
-
"primary_panel_ratings": [
|
| 219 |
-
1,
|
| 220 |
-
1
|
| 221 |
-
],
|
| 222 |
-
"primary_majority": "LOW",
|
| 223 |
-
"devil_rating": "HIGH",
|
| 224 |
-
"three_judge_majority": "LOW",
|
| 225 |
-
"primary_correct": true,
|
| 226 |
-
"devil_correct": false
|
| 227 |
-
},
|
| 228 |
-
"Just-in-time_manufacturing": {
|
| 229 |
-
"ground_truth": "MEDIUM",
|
| 230 |
-
"primary_panel_ratings": [
|
| 231 |
-
1,
|
| 232 |
-
1
|
| 233 |
-
],
|
| 234 |
-
"primary_majority": "LOW",
|
| 235 |
-
"devil_rating": "HIGH",
|
| 236 |
-
"three_judge_majority": "LOW",
|
| 237 |
-
"primary_correct": false,
|
| 238 |
-
"devil_correct": false
|
| 239 |
-
},
|
| 240 |
-
"Logistics": {
|
| 241 |
-
"ground_truth": "LOW",
|
| 242 |
-
"primary_panel_ratings": [
|
| 243 |
-
1,
|
| 244 |
-
1
|
| 245 |
-
],
|
| 246 |
-
"primary_majority": "LOW",
|
| 247 |
-
"devil_rating": "HIGH",
|
| 248 |
-
"three_judge_majority": "LOW",
|
| 249 |
-
"primary_correct": true,
|
| 250 |
-
"devil_correct": false
|
| 251 |
-
},
|
| 252 |
-
"Port_of_Los_Angeles": {
|
| 253 |
-
"ground_truth": "MEDIUM",
|
| 254 |
-
"primary_panel_ratings": [
|
| 255 |
-
2,
|
| 256 |
-
2
|
| 257 |
-
],
|
| 258 |
-
"primary_majority": "MEDIUM",
|
| 259 |
-
"devil_rating": "HIGH",
|
| 260 |
-
"three_judge_majority": "MEDIUM",
|
| 261 |
-
"primary_correct": true,
|
| 262 |
-
"devil_correct": false
|
| 263 |
-
},
|
| 264 |
-
"Port_of_Singapore": {
|
| 265 |
-
"ground_truth": "MEDIUM",
|
| 266 |
-
"primary_panel_ratings": [
|
| 267 |
-
3,
|
| 268 |
-
2
|
| 269 |
-
],
|
| 270 |
-
"primary_majority": "MEDIUM",
|
| 271 |
-
"devil_rating": "HIGH",
|
| 272 |
-
"three_judge_majority": "HIGH",
|
| 273 |
-
"primary_correct": true,
|
| 274 |
-
"devil_correct": false
|
| 275 |
-
},
|
| 276 |
-
"Red_Sea_crisis": {
|
| 277 |
-
"ground_truth": "CRITICAL",
|
| 278 |
-
"primary_panel_ratings": [
|
| 279 |
-
3,
|
| 280 |
-
3
|
| 281 |
-
],
|
| 282 |
-
"primary_majority": "HIGH",
|
| 283 |
-
"devil_rating": "CRITICAL",
|
| 284 |
-
"three_judge_majority": "HIGH",
|
| 285 |
-
"primary_correct": false,
|
| 286 |
-
"devil_correct": true
|
| 287 |
-
},
|
| 288 |
-
"Samsung_Electronics": {
|
| 289 |
-
"ground_truth": "MEDIUM",
|
| 290 |
-
"primary_panel_ratings": [
|
| 291 |
-
2,
|
| 292 |
-
1
|
| 293 |
-
],
|
| 294 |
-
"primary_majority": "MEDIUM",
|
| 295 |
-
"devil_rating": "HIGH",
|
| 296 |
-
"three_judge_majority": "MEDIUM",
|
| 297 |
-
"primary_correct": true,
|
| 298 |
-
"devil_correct": false
|
| 299 |
-
},
|
| 300 |
-
"Semiconductor_industry": {
|
| 301 |
-
"ground_truth": "HIGH",
|
| 302 |
-
"primary_panel_ratings": [
|
| 303 |
-
2,
|
| 304 |
-
1
|
| 305 |
-
],
|
| 306 |
-
"primary_majority": "MEDIUM",
|
| 307 |
-
"devil_rating": "CRITICAL",
|
| 308 |
-
"three_judge_majority": "MEDIUM",
|
| 309 |
-
"primary_correct": false,
|
| 310 |
-
"devil_correct": false
|
| 311 |
-
},
|
| 312 |
-
"Strait_of_Hormuz": {
|
| 313 |
-
"ground_truth": "HIGH",
|
| 314 |
-
"primary_panel_ratings": [
|
| 315 |
-
4,
|
| 316 |
-
3
|
| 317 |
-
],
|
| 318 |
-
"primary_majority": "CRITICAL",
|
| 319 |
-
"devil_rating": "HIGH",
|
| 320 |
-
"three_judge_majority": "HIGH",
|
| 321 |
-
"primary_correct": false,
|
| 322 |
-
"devil_correct": true
|
| 323 |
-
},
|
| 324 |
-
"Strait_of_Malacca": {
|
| 325 |
-
"ground_truth": "HIGH",
|
| 326 |
-
"primary_panel_ratings": [
|
| 327 |
-
3,
|
| 328 |
-
3
|
| 329 |
-
],
|
| 330 |
-
"primary_majority": "HIGH",
|
| 331 |
-
"devil_rating": "HIGH",
|
| 332 |
-
"three_judge_majority": "HIGH",
|
| 333 |
-
"primary_correct": true,
|
| 334 |
-
"devil_correct": true
|
| 335 |
-
},
|
| 336 |
-
"Suez_Canal": {
|
| 337 |
-
"ground_truth": "HIGH",
|
| 338 |
-
"primary_panel_ratings": [
|
| 339 |
-
3,
|
| 340 |
-
1
|
| 341 |
-
],
|
| 342 |
-
"primary_majority": "MEDIUM",
|
| 343 |
-
"devil_rating": "CRITICAL",
|
| 344 |
-
"three_judge_majority": "HIGH",
|
| 345 |
-
"primary_correct": false,
|
| 346 |
-
"devil_correct": false
|
| 347 |
-
},
|
| 348 |
-
"Supply_chain_attack": {
|
| 349 |
-
"ground_truth": "HIGH",
|
| 350 |
-
"primary_panel_ratings": [
|
| 351 |
-
2,
|
| 352 |
-
3
|
| 353 |
-
],
|
| 354 |
-
"primary_majority": "MEDIUM",
|
| 355 |
-
"devil_rating": "CRITICAL",
|
| 356 |
-
"three_judge_majority": "HIGH",
|
| 357 |
-
"primary_correct": false,
|
| 358 |
-
"devil_correct": false
|
| 359 |
-
},
|
| 360 |
-
"Supply_chain_management": {
|
| 361 |
-
"ground_truth": "LOW",
|
| 362 |
-
"primary_panel_ratings": [
|
| 363 |
-
1,
|
| 364 |
-
1
|
| 365 |
-
],
|
| 366 |
-
"primary_majority": "LOW",
|
| 367 |
-
"devil_rating": "HIGH",
|
| 368 |
-
"three_judge_majority": "LOW",
|
| 369 |
-
"primary_correct": true,
|
| 370 |
-
"devil_correct": false
|
| 371 |
-
},
|
| 372 |
-
"TSMC": {
|
| 373 |
-
"ground_truth": "HIGH",
|
| 374 |
-
"primary_panel_ratings": [
|
| 375 |
-
3,
|
| 376 |
-
3
|
| 377 |
-
],
|
| 378 |
-
"primary_majority": "HIGH",
|
| 379 |
-
"devil_rating": "HIGH",
|
| 380 |
-
"three_judge_majority": "HIGH",
|
| 381 |
-
"primary_correct": true,
|
| 382 |
-
"devil_correct": true
|
| 383 |
-
},
|
| 384 |
-
"Warehouse": {
|
| 385 |
-
"ground_truth": "LOW",
|
| 386 |
-
"primary_panel_ratings": [
|
| 387 |
-
1,
|
| 388 |
-
1
|
| 389 |
-
],
|
| 390 |
-
"primary_majority": "LOW",
|
| 391 |
-
"devil_rating": "MEDIUM",
|
| 392 |
-
"three_judge_majority": "LOW",
|
| 393 |
-
"primary_correct": true,
|
| 394 |
-
"devil_correct": false
|
| 395 |
-
}
|
| 396 |
-
}
|
| 397 |
}
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"description": "R4 ablation: DeepSeek-R1-Q4 reassigned to devil's-advocate (consulted, not voting). Primary consensus = Qwen-14B + Mistral-Nemo.",
|
| 3 |
+
"primary_judges": [
|
| 4 |
+
"qwen25-14b-local",
|
| 5 |
+
"mistral-nemo-local"
|
| 6 |
+
],
|
| 7 |
+
"devils_advocate": "deepseek-r1-local-q4",
|
| 8 |
+
"n_scenarios": 26,
|
| 9 |
+
"agreement_primary_panel": {
|
| 10 |
+
"krippendorff_alpha_ordinal": 0.7499056959637873,
|
| 11 |
+
"cohen_weighted_kappa_qwen_vs_mistral": 0.7473841554559043
|
| 12 |
+
},
|
| 13 |
+
"accuracy_vs_ground_truth": {
|
| 14 |
+
"primary_majority_vote": {
|
| 15 |
+
"correct": 16,
|
| 16 |
+
"total": 26,
|
| 17 |
+
"accuracy": 0.6153846153846154
|
| 18 |
+
},
|
| 19 |
+
"three_judge_majority_vote_ORIGINAL": {
|
| 20 |
+
"correct": 18,
|
| 21 |
+
"total": 26,
|
| 22 |
+
"accuracy": 0.6923076923076923
|
| 23 |
+
},
|
| 24 |
+
"devils_advocate_deepseek": {
|
| 25 |
+
"correct": 8,
|
| 26 |
+
"total": 26,
|
| 27 |
+
"accuracy": 0.3076923076923077
|
| 28 |
+
}
|
| 29 |
+
},
|
| 30 |
+
"confusion_matrix_primary": [
|
| 31 |
+
[
|
| 32 |
+
7,
|
| 33 |
+
0,
|
| 34 |
+
0,
|
| 35 |
+
0
|
| 36 |
+
],
|
| 37 |
+
[
|
| 38 |
+
2,
|
| 39 |
+
5,
|
| 40 |
+
0,
|
| 41 |
+
0
|
| 42 |
+
],
|
| 43 |
+
[
|
| 44 |
+
0,
|
| 45 |
+
5,
|
| 46 |
+
3,
|
| 47 |
+
1
|
| 48 |
+
],
|
| 49 |
+
[
|
| 50 |
+
0,
|
| 51 |
+
0,
|
| 52 |
+
2,
|
| 53 |
+
1
|
| 54 |
+
]
|
| 55 |
+
],
|
| 56 |
+
"confusion_matrix_three_judge_ORIGINAL": [
|
| 57 |
+
[
|
| 58 |
+
7,
|
| 59 |
+
0,
|
| 60 |
+
0,
|
| 61 |
+
0
|
| 62 |
+
],
|
| 63 |
+
[
|
| 64 |
+
2,
|
| 65 |
+
3,
|
| 66 |
+
2,
|
| 67 |
+
0
|
| 68 |
+
],
|
| 69 |
+
[
|
| 70 |
+
0,
|
| 71 |
+
2,
|
| 72 |
+
7,
|
| 73 |
+
0
|
| 74 |
+
],
|
| 75 |
+
[
|
| 76 |
+
0,
|
| 77 |
+
0,
|
| 78 |
+
2,
|
| 79 |
+
1
|
| 80 |
+
]
|
| 81 |
+
],
|
| 82 |
+
"calibration_ece_primary": 0.2894230769230769,
|
| 83 |
+
"per_scenario": {
|
| 84 |
+
"2011_T\u014dhoku_earthquake_and_tsunami": {
|
| 85 |
+
"ground_truth": "CRITICAL",
|
| 86 |
+
"primary_panel_ratings": [
|
| 87 |
+
4,
|
| 88 |
+
4
|
| 89 |
+
],
|
| 90 |
+
"primary_majority": "CRITICAL",
|
| 91 |
+
"devil_rating": "HIGH",
|
| 92 |
+
"three_judge_majority": "CRITICAL",
|
| 93 |
+
"primary_correct": true,
|
| 94 |
+
"devil_correct": false
|
| 95 |
+
},
|
| 96 |
+
"2020\u20132023_global_chip_shortage": {
|
| 97 |
+
"ground_truth": "CRITICAL",
|
| 98 |
+
"primary_panel_ratings": [
|
| 99 |
+
3,
|
| 100 |
+
3
|
| 101 |
+
],
|
| 102 |
+
"primary_majority": "HIGH",
|
| 103 |
+
"devil_rating": "CRITICAL",
|
| 104 |
+
"three_judge_majority": "HIGH",
|
| 105 |
+
"primary_correct": false,
|
| 106 |
+
"devil_correct": true
|
| 107 |
+
},
|
| 108 |
+
"2021_Suez_Canal_obstruction": {
|
| 109 |
+
"ground_truth": "HIGH",
|
| 110 |
+
"primary_panel_ratings": [
|
| 111 |
+
3,
|
| 112 |
+
3
|
| 113 |
+
],
|
| 114 |
+
"primary_majority": "HIGH",
|
| 115 |
+
"devil_rating": "HIGH",
|
| 116 |
+
"three_judge_majority": "HIGH",
|
| 117 |
+
"primary_correct": true,
|
| 118 |
+
"devil_correct": true
|
| 119 |
+
},
|
| 120 |
+
"Bab-el-Mandeb": {
|
| 121 |
+
"ground_truth": "HIGH",
|
| 122 |
+
"primary_panel_ratings": [
|
| 123 |
+
2,
|
| 124 |
+
1
|
| 125 |
+
],
|
| 126 |
+
"primary_majority": "MEDIUM",
|
| 127 |
+
"devil_rating": "HIGH",
|
| 128 |
+
"three_judge_majority": "MEDIUM",
|
| 129 |
+
"primary_correct": false,
|
| 130 |
+
"devil_correct": true
|
| 131 |
+
},
|
| 132 |
+
"Baltic_Dry_Index": {
|
| 133 |
+
"ground_truth": "LOW",
|
| 134 |
+
"primary_panel_ratings": [
|
| 135 |
+
1,
|
| 136 |
+
1
|
| 137 |
+
],
|
| 138 |
+
"primary_majority": "LOW",
|
| 139 |
+
"devil_rating": "HIGH",
|
| 140 |
+
"three_judge_majority": "LOW",
|
| 141 |
+
"primary_correct": true,
|
| 142 |
+
"devil_correct": false
|
| 143 |
+
},
|
| 144 |
+
"Bullwhip_effect": {
|
| 145 |
+
"ground_truth": "MEDIUM",
|
| 146 |
+
"primary_panel_ratings": [
|
| 147 |
+
1,
|
| 148 |
+
1
|
| 149 |
+
],
|
| 150 |
+
"primary_majority": "LOW",
|
| 151 |
+
"devil_rating": "HIGH",
|
| 152 |
+
"three_judge_majority": "LOW",
|
| 153 |
+
"primary_correct": false,
|
| 154 |
+
"devil_correct": false
|
| 155 |
+
},
|
| 156 |
+
"CHIPS_and_Science_Act": {
|
| 157 |
+
"ground_truth": "MEDIUM",
|
| 158 |
+
"primary_panel_ratings": [
|
| 159 |
+
1,
|
| 160 |
+
2
|
| 161 |
+
],
|
| 162 |
+
"primary_majority": "MEDIUM",
|
| 163 |
+
"devil_rating": "HIGH",
|
| 164 |
+
"three_judge_majority": "MEDIUM",
|
| 165 |
+
"primary_correct": true,
|
| 166 |
+
"devil_correct": false
|
| 167 |
+
},
|
| 168 |
+
"Container_ship": {
|
| 169 |
+
"ground_truth": "LOW",
|
| 170 |
+
"primary_panel_ratings": [
|
| 171 |
+
1,
|
| 172 |
+
1
|
| 173 |
+
],
|
| 174 |
+
"primary_majority": "LOW",
|
| 175 |
+
"devil_rating": "HIGH",
|
| 176 |
+
"three_judge_majority": "LOW",
|
| 177 |
+
"primary_correct": true,
|
| 178 |
+
"devil_correct": false
|
| 179 |
+
},
|
| 180 |
+
"Enterprise_resource_planning": {
|
| 181 |
+
"ground_truth": "LOW",
|
| 182 |
+
"primary_panel_ratings": [
|
| 183 |
+
1,
|
| 184 |
+
1
|
| 185 |
+
],
|
| 186 |
+
"primary_majority": "LOW",
|
| 187 |
+
"devil_rating": "MEDIUM",
|
| 188 |
+
"three_judge_majority": "LOW",
|
| 189 |
+
"primary_correct": true,
|
| 190 |
+
"devil_correct": false
|
| 191 |
+
},
|
| 192 |
+
"Ever_Given": {
|
| 193 |
+
"ground_truth": "HIGH",
|
| 194 |
+
"primary_panel_ratings": [
|
| 195 |
+
2,
|
| 196 |
+
3
|
| 197 |
+
],
|
| 198 |
+
"primary_majority": "MEDIUM",
|
| 199 |
+
"devil_rating": "HIGH",
|
| 200 |
+
"three_judge_majority": "HIGH",
|
| 201 |
+
"primary_correct": false,
|
| 202 |
+
"devil_correct": true
|
| 203 |
+
},
|
| 204 |
+
"Foxconn": {
|
| 205 |
+
"ground_truth": "MEDIUM",
|
| 206 |
+
"primary_panel_ratings": [
|
| 207 |
+
3,
|
| 208 |
+
2
|
| 209 |
+
],
|
| 210 |
+
"primary_majority": "MEDIUM",
|
| 211 |
+
"devil_rating": "HIGH",
|
| 212 |
+
"three_judge_majority": "HIGH",
|
| 213 |
+
"primary_correct": true,
|
| 214 |
+
"devil_correct": false
|
| 215 |
+
},
|
| 216 |
+
"Inventory": {
|
| 217 |
+
"ground_truth": "LOW",
|
| 218 |
+
"primary_panel_ratings": [
|
| 219 |
+
1,
|
| 220 |
+
1
|
| 221 |
+
],
|
| 222 |
+
"primary_majority": "LOW",
|
| 223 |
+
"devil_rating": "HIGH",
|
| 224 |
+
"three_judge_majority": "LOW",
|
| 225 |
+
"primary_correct": true,
|
| 226 |
+
"devil_correct": false
|
| 227 |
+
},
|
| 228 |
+
"Just-in-time_manufacturing": {
|
| 229 |
+
"ground_truth": "MEDIUM",
|
| 230 |
+
"primary_panel_ratings": [
|
| 231 |
+
1,
|
| 232 |
+
1
|
| 233 |
+
],
|
| 234 |
+
"primary_majority": "LOW",
|
| 235 |
+
"devil_rating": "HIGH",
|
| 236 |
+
"three_judge_majority": "LOW",
|
| 237 |
+
"primary_correct": false,
|
| 238 |
+
"devil_correct": false
|
| 239 |
+
},
|
| 240 |
+
"Logistics": {
|
| 241 |
+
"ground_truth": "LOW",
|
| 242 |
+
"primary_panel_ratings": [
|
| 243 |
+
1,
|
| 244 |
+
1
|
| 245 |
+
],
|
| 246 |
+
"primary_majority": "LOW",
|
| 247 |
+
"devil_rating": "HIGH",
|
| 248 |
+
"three_judge_majority": "LOW",
|
| 249 |
+
"primary_correct": true,
|
| 250 |
+
"devil_correct": false
|
| 251 |
+
},
|
| 252 |
+
"Port_of_Los_Angeles": {
|
| 253 |
+
"ground_truth": "MEDIUM",
|
| 254 |
+
"primary_panel_ratings": [
|
| 255 |
+
2,
|
| 256 |
+
2
|
| 257 |
+
],
|
| 258 |
+
"primary_majority": "MEDIUM",
|
| 259 |
+
"devil_rating": "HIGH",
|
| 260 |
+
"three_judge_majority": "MEDIUM",
|
| 261 |
+
"primary_correct": true,
|
| 262 |
+
"devil_correct": false
|
| 263 |
+
},
|
| 264 |
+
"Port_of_Singapore": {
|
| 265 |
+
"ground_truth": "MEDIUM",
|
| 266 |
+
"primary_panel_ratings": [
|
| 267 |
+
3,
|
| 268 |
+
2
|
| 269 |
+
],
|
| 270 |
+
"primary_majority": "MEDIUM",
|
| 271 |
+
"devil_rating": "HIGH",
|
| 272 |
+
"three_judge_majority": "HIGH",
|
| 273 |
+
"primary_correct": true,
|
| 274 |
+
"devil_correct": false
|
| 275 |
+
},
|
| 276 |
+
"Red_Sea_crisis": {
|
| 277 |
+
"ground_truth": "CRITICAL",
|
| 278 |
+
"primary_panel_ratings": [
|
| 279 |
+
3,
|
| 280 |
+
3
|
| 281 |
+
],
|
| 282 |
+
"primary_majority": "HIGH",
|
| 283 |
+
"devil_rating": "CRITICAL",
|
| 284 |
+
"three_judge_majority": "HIGH",
|
| 285 |
+
"primary_correct": false,
|
| 286 |
+
"devil_correct": true
|
| 287 |
+
},
|
| 288 |
+
"Samsung_Electronics": {
|
| 289 |
+
"ground_truth": "MEDIUM",
|
| 290 |
+
"primary_panel_ratings": [
|
| 291 |
+
2,
|
| 292 |
+
1
|
| 293 |
+
],
|
| 294 |
+
"primary_majority": "MEDIUM",
|
| 295 |
+
"devil_rating": "HIGH",
|
| 296 |
+
"three_judge_majority": "MEDIUM",
|
| 297 |
+
"primary_correct": true,
|
| 298 |
+
"devil_correct": false
|
| 299 |
+
},
|
| 300 |
+
"Semiconductor_industry": {
|
| 301 |
+
"ground_truth": "HIGH",
|
| 302 |
+
"primary_panel_ratings": [
|
| 303 |
+
2,
|
| 304 |
+
1
|
| 305 |
+
],
|
| 306 |
+
"primary_majority": "MEDIUM",
|
| 307 |
+
"devil_rating": "CRITICAL",
|
| 308 |
+
"three_judge_majority": "MEDIUM",
|
| 309 |
+
"primary_correct": false,
|
| 310 |
+
"devil_correct": false
|
| 311 |
+
},
|
| 312 |
+
"Strait_of_Hormuz": {
|
| 313 |
+
"ground_truth": "HIGH",
|
| 314 |
+
"primary_panel_ratings": [
|
| 315 |
+
4,
|
| 316 |
+
3
|
| 317 |
+
],
|
| 318 |
+
"primary_majority": "CRITICAL",
|
| 319 |
+
"devil_rating": "HIGH",
|
| 320 |
+
"three_judge_majority": "HIGH",
|
| 321 |
+
"primary_correct": false,
|
| 322 |
+
"devil_correct": true
|
| 323 |
+
},
|
| 324 |
+
"Strait_of_Malacca": {
|
| 325 |
+
"ground_truth": "HIGH",
|
| 326 |
+
"primary_panel_ratings": [
|
| 327 |
+
3,
|
| 328 |
+
3
|
| 329 |
+
],
|
| 330 |
+
"primary_majority": "HIGH",
|
| 331 |
+
"devil_rating": "HIGH",
|
| 332 |
+
"three_judge_majority": "HIGH",
|
| 333 |
+
"primary_correct": true,
|
| 334 |
+
"devil_correct": true
|
| 335 |
+
},
|
| 336 |
+
"Suez_Canal": {
|
| 337 |
+
"ground_truth": "HIGH",
|
| 338 |
+
"primary_panel_ratings": [
|
| 339 |
+
3,
|
| 340 |
+
1
|
| 341 |
+
],
|
| 342 |
+
"primary_majority": "MEDIUM",
|
| 343 |
+
"devil_rating": "CRITICAL",
|
| 344 |
+
"three_judge_majority": "HIGH",
|
| 345 |
+
"primary_correct": false,
|
| 346 |
+
"devil_correct": false
|
| 347 |
+
},
|
| 348 |
+
"Supply_chain_attack": {
|
| 349 |
+
"ground_truth": "HIGH",
|
| 350 |
+
"primary_panel_ratings": [
|
| 351 |
+
2,
|
| 352 |
+
3
|
| 353 |
+
],
|
| 354 |
+
"primary_majority": "MEDIUM",
|
| 355 |
+
"devil_rating": "CRITICAL",
|
| 356 |
+
"three_judge_majority": "HIGH",
|
| 357 |
+
"primary_correct": false,
|
| 358 |
+
"devil_correct": false
|
| 359 |
+
},
|
| 360 |
+
"Supply_chain_management": {
|
| 361 |
+
"ground_truth": "LOW",
|
| 362 |
+
"primary_panel_ratings": [
|
| 363 |
+
1,
|
| 364 |
+
1
|
| 365 |
+
],
|
| 366 |
+
"primary_majority": "LOW",
|
| 367 |
+
"devil_rating": "HIGH",
|
| 368 |
+
"three_judge_majority": "LOW",
|
| 369 |
+
"primary_correct": true,
|
| 370 |
+
"devil_correct": false
|
| 371 |
+
},
|
| 372 |
+
"TSMC": {
|
| 373 |
+
"ground_truth": "HIGH",
|
| 374 |
+
"primary_panel_ratings": [
|
| 375 |
+
3,
|
| 376 |
+
3
|
| 377 |
+
],
|
| 378 |
+
"primary_majority": "HIGH",
|
| 379 |
+
"devil_rating": "HIGH",
|
| 380 |
+
"three_judge_majority": "HIGH",
|
| 381 |
+
"primary_correct": true,
|
| 382 |
+
"devil_correct": true
|
| 383 |
+
},
|
| 384 |
+
"Warehouse": {
|
| 385 |
+
"ground_truth": "LOW",
|
| 386 |
+
"primary_panel_ratings": [
|
| 387 |
+
1,
|
| 388 |
+
1
|
| 389 |
+
],
|
| 390 |
+
"primary_majority": "LOW",
|
| 391 |
+
"devil_rating": "MEDIUM",
|
| 392 |
+
"three_judge_majority": "LOW",
|
| 393 |
+
"primary_correct": true,
|
| 394 |
+
"devil_correct": false
|
| 395 |
+
}
|
| 396 |
+
}
|
| 397 |
}
|
FINAL_SUBMIT/receipts/R4_DANGEROUS_V2_LIVE.json
CHANGED
|
@@ -1,64 +1,64 @@
|
|
| 1 |
-
{
|
| 2 |
-
"scenario": "Red_Sea_crisis",
|
| 3 |
-
"ground_truth": "CRITICAL",
|
| 4 |
-
"per_judge": {
|
| 5 |
-
"qwen25-14b-local": {
|
| 6 |
-
"risk_level": "CRITICAL",
|
| 7 |
-
"parsed": {
|
| 8 |
-
"risk_level": "CRITICAL",
|
| 9 |
-
"confidence": 0.9,
|
| 10 |
-
"primary_vulnerabilities": [
|
| 11 |
-
"Increased threat to maritime shipping in the Red Sea and Bab-el-Mandeb Strait.",
|
| 12 |
-
"Potential for further escalation involving multiple countries including Iran, Israel, US, UK, and Yemen.",
|
| 13 |
-
"Disruption of global trade routes through the Suez Canal."
|
| 14 |
-
],
|
| 15 |
-
"mitigations": [
|
| 16 |
-
"Enhanced naval patrols by international coalitions to protect shipping lanes.",
|
| 17 |
-
"Development of alternative shipping routes or rerouting strategies for high-risk areas.",
|
| 18 |
-
"Strengthened diplomatic efforts to negotiate a long-term ceasefire and resolution."
|
| 19 |
-
],
|
| 20 |
-
"reasoning_one_line": "Resumption of Houthi attacks on Israel amid the Iran war poses severe threats to global maritime security and trade stability.",
|
| 21 |
-
"time_sensitivity": "VOLATILE"
|
| 22 |
-
},
|
| 23 |
-
"correct": true,
|
| 24 |
-
"latency_s": 21.539926052093506,
|
| 25 |
-
"raw_preview": "{\n \"risk_level\": \"CRITICAL\",\n \"confidence\": 0.9,\n \"primary_vulnerabilities\": [\n \"Increased threat to maritime shipping in the Red Sea and Bab-el-Mandeb Strait.\",\n \"Potential for further escalation involving multiple countries including Iran, Israel, US, UK, and Yemen.\",\n \"Disruption of global trade routes through the Suez Canal.\"\n ],\n \"mitigations\": [\n \"Enhanced naval patrols by i"
|
| 26 |
-
},
|
| 27 |
-
"mistral-nemo-local": {
|
| 28 |
-
"risk_level": "HIGH",
|
| 29 |
-
"parsed": {
|
| 30 |
-
"risk_level": "HIGH",
|
| 31 |
-
"confidence": 0.95,
|
| 32 |
-
"primary_vulnerabilities": [
|
| 33 |
-
"Maritime traffic disruption in the Red Sea and Bab-el-Mandeb strait, impacting global trade",
|
| 34 |
-
"Escalation of conflict with potential for wider regional involvement (e.g., Iran, Saudi Arabia)",
|
| 35 |
-
"Potential targeting of international merchant vessels, regardless of affiliation"
|
| 36 |
-
],
|
| 37 |
-
"mitigations": [
|
| 38 |
-
"Strengthening maritime security cooperation and escort missions in the Red Sea (Operation Prosperity Guardian)",
|
| 39 |
-
"Diplomatic efforts to broker a ceasefire and political solution in Yemen",
|
| 40 |
-
"Diversion of shipping routes around South Africa for vessels at high risk"
|
| 41 |
-
],
|
| 42 |
-
"reasoning_one_line": "Resumption of Houthi attacks amidst regional conflict escalation poses significant risk to maritime traffic and global trade.",
|
| 43 |
-
"time_sensitivity": "VOLATILE"
|
| 44 |
-
},
|
| 45 |
-
"correct": false,
|
| 46 |
-
"latency_s": 25.940397024154663,
|
| 47 |
-
"raw_preview": "{\n \"risk_level\": \"HIGH\",\n \"confidence\": 0.95,\n \"primary_vulnerabilities\": [\n \"Maritime traffic disruption in the Red Sea and Bab-el-Mandeb strait, impacting global trade\",\n \"Escalation of conflict with potential for wider regional involvement (e.g., Iran, Saudi Arabia)\",\n \"Potential targeting of international merchant vessels, regardless of affiliation\"\n ],\n \"mitigations\": [\n \"Str"
|
| 48 |
-
}
|
| 49 |
-
},
|
| 50 |
-
"devil": {
|
| 51 |
-
"model": "deepseek-r1-local-q4",
|
| 52 |
-
"risk_level": "PARSE_FAIL",
|
| 53 |
-
"correct": false,
|
| 54 |
-
"latency_s": 30.340745210647583,
|
| 55 |
-
"raw_preview": " Include the following sections:\n- Current Situation: Description of the conflict's current state.\n- Threats: List and describe each threat type (e.g., direct, indirect).\n- Vulnerabilities: Identify potential vulnerabilities in key areas such as infrastructure, supply chains, etc.\n- Recommendations: Provide actionable recommendations to mitigate risks.\n\nPlease make sure that your JSON is properly "
|
| 56 |
-
},
|
| 57 |
-
"summary": {
|
| 58 |
-
"primary_panel_all_correct": false,
|
| 59 |
-
"primary_correct_count": "1/2",
|
| 60 |
-
"three_judge_correct_count": "1/3",
|
| 61 |
-
"consensus_primary": "CRITICAL",
|
| 62 |
-
"ground_truth": "CRITICAL"
|
| 63 |
-
}
|
| 64 |
}
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"scenario": "Red_Sea_crisis",
|
| 3 |
+
"ground_truth": "CRITICAL",
|
| 4 |
+
"per_judge": {
|
| 5 |
+
"qwen25-14b-local": {
|
| 6 |
+
"risk_level": "CRITICAL",
|
| 7 |
+
"parsed": {
|
| 8 |
+
"risk_level": "CRITICAL",
|
| 9 |
+
"confidence": 0.9,
|
| 10 |
+
"primary_vulnerabilities": [
|
| 11 |
+
"Increased threat to maritime shipping in the Red Sea and Bab-el-Mandeb Strait.",
|
| 12 |
+
"Potential for further escalation involving multiple countries including Iran, Israel, US, UK, and Yemen.",
|
| 13 |
+
"Disruption of global trade routes through the Suez Canal."
|
| 14 |
+
],
|
| 15 |
+
"mitigations": [
|
| 16 |
+
"Enhanced naval patrols by international coalitions to protect shipping lanes.",
|
| 17 |
+
"Development of alternative shipping routes or rerouting strategies for high-risk areas.",
|
| 18 |
+
"Strengthened diplomatic efforts to negotiate a long-term ceasefire and resolution."
|
| 19 |
+
],
|
| 20 |
+
"reasoning_one_line": "Resumption of Houthi attacks on Israel amid the Iran war poses severe threats to global maritime security and trade stability.",
|
| 21 |
+
"time_sensitivity": "VOLATILE"
|
| 22 |
+
},
|
| 23 |
+
"correct": true,
|
| 24 |
+
"latency_s": 21.539926052093506,
|
| 25 |
+
"raw_preview": "{\n \"risk_level\": \"CRITICAL\",\n \"confidence\": 0.9,\n \"primary_vulnerabilities\": [\n \"Increased threat to maritime shipping in the Red Sea and Bab-el-Mandeb Strait.\",\n \"Potential for further escalation involving multiple countries including Iran, Israel, US, UK, and Yemen.\",\n \"Disruption of global trade routes through the Suez Canal.\"\n ],\n \"mitigations\": [\n \"Enhanced naval patrols by i"
|
| 26 |
+
},
|
| 27 |
+
"mistral-nemo-local": {
|
| 28 |
+
"risk_level": "HIGH",
|
| 29 |
+
"parsed": {
|
| 30 |
+
"risk_level": "HIGH",
|
| 31 |
+
"confidence": 0.95,
|
| 32 |
+
"primary_vulnerabilities": [
|
| 33 |
+
"Maritime traffic disruption in the Red Sea and Bab-el-Mandeb strait, impacting global trade",
|
| 34 |
+
"Escalation of conflict with potential for wider regional involvement (e.g., Iran, Saudi Arabia)",
|
| 35 |
+
"Potential targeting of international merchant vessels, regardless of affiliation"
|
| 36 |
+
],
|
| 37 |
+
"mitigations": [
|
| 38 |
+
"Strengthening maritime security cooperation and escort missions in the Red Sea (Operation Prosperity Guardian)",
|
| 39 |
+
"Diplomatic efforts to broker a ceasefire and political solution in Yemen",
|
| 40 |
+
"Diversion of shipping routes around South Africa for vessels at high risk"
|
| 41 |
+
],
|
| 42 |
+
"reasoning_one_line": "Resumption of Houthi attacks amidst regional conflict escalation poses significant risk to maritime traffic and global trade.",
|
| 43 |
+
"time_sensitivity": "VOLATILE"
|
| 44 |
+
},
|
| 45 |
+
"correct": false,
|
| 46 |
+
"latency_s": 25.940397024154663,
|
| 47 |
+
"raw_preview": "{\n \"risk_level\": \"HIGH\",\n \"confidence\": 0.95,\n \"primary_vulnerabilities\": [\n \"Maritime traffic disruption in the Red Sea and Bab-el-Mandeb strait, impacting global trade\",\n \"Escalation of conflict with potential for wider regional involvement (e.g., Iran, Saudi Arabia)\",\n \"Potential targeting of international merchant vessels, regardless of affiliation\"\n ],\n \"mitigations\": [\n \"Str"
|
| 48 |
+
}
|
| 49 |
+
},
|
| 50 |
+
"devil": {
|
| 51 |
+
"model": "deepseek-r1-local-q4",
|
| 52 |
+
"risk_level": "PARSE_FAIL",
|
| 53 |
+
"correct": false,
|
| 54 |
+
"latency_s": 30.340745210647583,
|
| 55 |
+
"raw_preview": " Include the following sections:\n- Current Situation: Description of the conflict's current state.\n- Threats: List and describe each threat type (e.g., direct, indirect).\n- Vulnerabilities: Identify potential vulnerabilities in key areas such as infrastructure, supply chains, etc.\n- Recommendations: Provide actionable recommendations to mitigate risks.\n\nPlease make sure that your JSON is properly "
|
| 56 |
+
},
|
| 57 |
+
"summary": {
|
| 58 |
+
"primary_panel_all_correct": false,
|
| 59 |
+
"primary_correct_count": "1/2",
|
| 60 |
+
"three_judge_correct_count": "1/3",
|
| 61 |
+
"consensus_primary": "CRITICAL",
|
| 62 |
+
"ground_truth": "CRITICAL"
|
| 63 |
+
}
|
| 64 |
}
|
FINAL_SUBMIT/receipts/R4_FRONTIER_PANEL_V2.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
FINAL_SUBMIT/receipts/R5_BEIR_MANUAL.json
CHANGED
|
@@ -1,1023 +1,1023 @@
|
|
| 1 |
-
{
|
| 2 |
-
"task": "SupplyMind-crisis-retrieval-BEIR-style",
|
| 3 |
-
"task_description": "Manual BEIR-style retrieval eval on 26 Wikipedia crisis articles + 20 real supply-chain queries. Metrics match the public MTEB retrieval leaderboard (nDCG@10, R@10, P@10). This is an out-of-domain task (supply chain, not medical), but numbers provide a directional check that our embedders are consistent with their published leaderboard performance.",
|
| 4 |
-
"our_results": {
|
| 5 |
-
"mxbai-embed-large-v1": {
|
| 6 |
-
"embedder": "mxbai-embed-large-v1",
|
| 7 |
-
"mean_ndcg@10": 0.9597824382702198,
|
| 8 |
-
"mean_recall@10": 1.0,
|
| 9 |
-
"mean_precision@10": 0.12000000000000002,
|
| 10 |
-
"corpus_encoding_s": 12.996914148330688,
|
| 11 |
-
"n_queries": 20,
|
| 12 |
-
"per_query": {
|
| 13 |
-
"q1": {
|
| 14 |
-
"query": "What was the magnitude of the 2011 Tohoku earthquake?",
|
| 15 |
-
"gold": [
|
| 16 |
-
"2011_T\u014dhoku_earthquake_and_tsunami"
|
| 17 |
-
],
|
| 18 |
-
"top5": [
|
| 19 |
-
"2011_T\u014dhoku_earthquake_and_tsunami",
|
| 20 |
-
"Ever_Given",
|
| 21 |
-
"2020\u20132023_global_chip_shortage",
|
| 22 |
-
"Container_ship",
|
| 23 |
-
"Warehouse"
|
| 24 |
-
],
|
| 25 |
-
"ndcg@10": 1.0,
|
| 26 |
-
"recall@10": 1.0,
|
| 27 |
-
"precision@10": 0.1
|
| 28 |
-
},
|
| 29 |
-
"q2": {
|
| 30 |
-
"query": "How long was the Suez Canal blocked in 2021?",
|
| 31 |
-
"gold": [
|
| 32 |
-
"2021_Suez_Canal_obstruction",
|
| 33 |
-
"Ever_Given"
|
| 34 |
-
],
|
| 35 |
-
"top5": [
|
| 36 |
-
"2021_Suez_Canal_obstruction",
|
| 37 |
-
"Suez_Canal",
|
| 38 |
-
"Ever_Given",
|
| 39 |
-
"Red_Sea_crisis",
|
| 40 |
-
"Bab-el-Mandeb"
|
| 41 |
-
],
|
| 42 |
-
"ndcg@10": 0.9197207891481876,
|
| 43 |
-
"recall@10": 1.0,
|
| 44 |
-
"precision@10": 0.2
|
| 45 |
-
},
|
| 46 |
-
"q3": {
|
| 47 |
-
"query": "What caused the global semiconductor shortage?",
|
| 48 |
-
"gold": [
|
| 49 |
-
"2020\u20132023_global_chip_shortage"
|
| 50 |
-
],
|
| 51 |
-
"top5": [
|
| 52 |
-
"2020\u20132023_global_chip_shortage",
|
| 53 |
-
"Semiconductor_industry",
|
| 54 |
-
"TSMC",
|
| 55 |
-
"Bullwhip_effect",
|
| 56 |
-
"CHIPS_and_Science_Act"
|
| 57 |
-
],
|
| 58 |
-
"ndcg@10": 1.0,
|
| 59 |
-
"recall@10": 1.0,
|
| 60 |
-
"precision@10": 0.1
|
| 61 |
-
},
|
| 62 |
-
"q4": {
|
| 63 |
-
"query": "Why is the Strait of Hormuz strategically important?",
|
| 64 |
-
"gold": [
|
| 65 |
-
"Strait_of_Hormuz"
|
| 66 |
-
],
|
| 67 |
-
"top5": [
|
| 68 |
-
"Strait_of_Hormuz",
|
| 69 |
-
"Strait_of_Malacca",
|
| 70 |
-
"Bab-el-Mandeb",
|
| 71 |
-
"Suez_Canal",
|
| 72 |
-
"Port_of_Singapore"
|
| 73 |
-
],
|
| 74 |
-
"ndcg@10": 1.0,
|
| 75 |
-
"recall@10": 1.0,
|
| 76 |
-
"precision@10": 0.1
|
| 77 |
-
},
|
| 78 |
-
"q5": {
|
| 79 |
-
"query": "How do Houthis threaten Red Sea shipping?",
|
| 80 |
-
"gold": [
|
| 81 |
-
"Red_Sea_crisis",
|
| 82 |
-
"Bab-el-Mandeb"
|
| 83 |
-
],
|
| 84 |
-
"top5": [
|
| 85 |
-
"Red_Sea_crisis",
|
| 86 |
-
"2021_Suez_Canal_obstruction",
|
| 87 |
-
"Bab-el-Mandeb",
|
| 88 |
-
"Strait_of_Hormuz",
|
| 89 |
-
"Suez_Canal"
|
| 90 |
-
],
|
| 91 |
-
"ndcg@10": 0.9197207891481876,
|
| 92 |
-
"recall@10": 1.0,
|
| 93 |
-
"precision@10": 0.2
|
| 94 |
-
},
|
| 95 |
-
"q6": {
|
| 96 |
-
"query": "Which foundry dominates advanced chip production?",
|
| 97 |
-
"gold": [
|
| 98 |
-
"TSMC",
|
| 99 |
-
"Semiconductor_industry"
|
| 100 |
-
],
|
| 101 |
-
"top5": [
|
| 102 |
-
"TSMC",
|
| 103 |
-
"Semiconductor_industry",
|
| 104 |
-
"Foxconn",
|
| 105 |
-
"CHIPS_and_Science_Act",
|
| 106 |
-
"2020\u20132023_global_chip_shortage"
|
| 107 |
-
],
|
| 108 |
-
"ndcg@10": 1.0,
|
| 109 |
-
"recall@10": 1.0,
|
| 110 |
-
"precision@10": 0.2
|
| 111 |
-
},
|
| 112 |
-
"q7": {
|
| 113 |
-
"query": "What is the bullwhip effect?",
|
| 114 |
-
"gold": [
|
| 115 |
-
"Bullwhip_effect"
|
| 116 |
-
],
|
| 117 |
-
"top5": [
|
| 118 |
-
"Bullwhip_effect",
|
| 119 |
-
"Inventory",
|
| 120 |
-
"Supply_chain_management",
|
| 121 |
-
"Supply_chain_attack",
|
| 122 |
-
"2020\u20132023_global_chip_shortage"
|
| 123 |
-
],
|
| 124 |
-
"ndcg@10": 1.0,
|
| 125 |
-
"recall@10": 1.0,
|
| 126 |
-
"precision@10": 0.1
|
| 127 |
-
},
|
| 128 |
-
"q8": {
|
| 129 |
-
"query": "Which port congested during 2021 supply chain crisis?",
|
| 130 |
-
"gold": [
|
| 131 |
-
"Port_of_Los_Angeles"
|
| 132 |
-
],
|
| 133 |
-
"top5": [
|
| 134 |
-
"2021_Suez_Canal_obstruction",
|
| 135 |
-
"2020\u20132023_global_chip_shortage",
|
| 136 |
-
"Ever_Given",
|
| 137 |
-
"Port_of_Singapore",
|
| 138 |
-
"Container_ship"
|
| 139 |
-
],
|
| 140 |
-
"ndcg@10": 0.3562071871080222,
|
| 141 |
-
"recall@10": 1.0,
|
| 142 |
-
"precision@10": 0.1
|
| 143 |
-
},
|
| 144 |
-
"q9": {
|
| 145 |
-
"query": "What is the just-in-time manufacturing philosophy?",
|
| 146 |
-
"gold": [
|
| 147 |
-
"Just-in-time_manufacturing"
|
| 148 |
-
],
|
| 149 |
-
"top5": [
|
| 150 |
-
"Just-in-time_manufacturing",
|
| 151 |
-
"Inventory",
|
| 152 |
-
"Supply_chain_management",
|
| 153 |
-
"Logistics",
|
| 154 |
-
"Enterprise_resource_planning"
|
| 155 |
-
],
|
| 156 |
-
"ndcg@10": 1.0,
|
| 157 |
-
"recall@10": 1.0,
|
| 158 |
-
"precision@10": 0.1
|
| 159 |
-
},
|
| 160 |
-
"q10": {
|
| 161 |
-
"query": "What does the CHIPS Act allocate?",
|
| 162 |
-
"gold": [
|
| 163 |
-
"CHIPS_and_Science_Act"
|
| 164 |
-
],
|
| 165 |
-
"top5": [
|
| 166 |
-
"CHIPS_and_Science_Act",
|
| 167 |
-
"2020\u20132023_global_chip_shortage",
|
| 168 |
-
"Semiconductor_industry",
|
| 169 |
-
"TSMC",
|
| 170 |
-
"Inventory"
|
| 171 |
-
],
|
| 172 |
-
"ndcg@10": 1.0,
|
| 173 |
-
"recall@10": 1.0,
|
| 174 |
-
"precision@10": 0.1
|
| 175 |
-
},
|
| 176 |
-
"q11": {
|
| 177 |
-
"query": "Who is Foxconn's primary customer?",
|
| 178 |
-
"gold": [
|
| 179 |
-
"Foxconn"
|
| 180 |
-
],
|
| 181 |
-
"top5": [
|
| 182 |
-
"Foxconn",
|
| 183 |
-
"Semiconductor_industry",
|
| 184 |
-
"TSMC",
|
| 185 |
-
"Bullwhip_effect",
|
| 186 |
-
"Samsung_Electronics"
|
| 187 |
-
],
|
| 188 |
-
"ndcg@10": 1.0,
|
| 189 |
-
"recall@10": 1.0,
|
| 190 |
-
"precision@10": 0.1
|
| 191 |
-
},
|
| 192 |
-
"q12": {
|
| 193 |
-
"query": "Why did the Ever Given run aground?",
|
| 194 |
-
"gold": [
|
| 195 |
-
"Ever_Given",
|
| 196 |
-
"2021_Suez_Canal_obstruction"
|
| 197 |
-
],
|
| 198 |
-
"top5": [
|
| 199 |
-
"Ever_Given",
|
| 200 |
-
"2021_Suez_Canal_obstruction",
|
| 201 |
-
"Container_ship",
|
| 202 |
-
"2011_T\u014dhoku_earthquake_and_tsunami",
|
| 203 |
-
"Suez_Canal"
|
| 204 |
-
],
|
| 205 |
-
"ndcg@10": 1.0,
|
| 206 |
-
"recall@10": 1.0,
|
| 207 |
-
"precision@10": 0.2
|
| 208 |
-
},
|
| 209 |
-
"q13": {
|
| 210 |
-
"query": "What is safety stock?",
|
| 211 |
-
"gold": [
|
| 212 |
-
"Inventory"
|
| 213 |
-
],
|
| 214 |
-
"top5": [
|
| 215 |
-
"Inventory",
|
| 216 |
-
"Container_ship",
|
| 217 |
-
"Just-in-time_manufacturing",
|
| 218 |
-
"Bullwhip_effect",
|
| 219 |
-
"Warehouse"
|
| 220 |
-
],
|
| 221 |
-
"ndcg@10": 1.0,
|
| 222 |
-
"recall@10": 1.0,
|
| 223 |
-
"precision@10": 0.1
|
| 224 |
-
},
|
| 225 |
-
"q14": {
|
| 226 |
-
"query": "What is a supply chain attack?",
|
| 227 |
-
"gold": [
|
| 228 |
-
"Supply_chain_attack"
|
| 229 |
-
],
|
| 230 |
-
"top5": [
|
| 231 |
-
"Supply_chain_attack",
|
| 232 |
-
"Supply_chain_management",
|
| 233 |
-
"Bullwhip_effect",
|
| 234 |
-
"Logistics",
|
| 235 |
-
"Inventory"
|
| 236 |
-
],
|
| 237 |
-
"ndcg@10": 1.0,
|
| 238 |
-
"recall@10": 1.0,
|
| 239 |
-
"precision@10": 0.1
|
| 240 |
-
},
|
| 241 |
-
"q15": {
|
| 242 |
-
"query": "How busy is the Port of Singapore?",
|
| 243 |
-
"gold": [
|
| 244 |
-
"Port_of_Singapore"
|
| 245 |
-
],
|
| 246 |
-
"top5": [
|
| 247 |
-
"Port_of_Singapore",
|
| 248 |
-
"Strait_of_Malacca",
|
| 249 |
-
"Port_of_Los_Angeles",
|
| 250 |
-
"2021_Suez_Canal_obstruction",
|
| 251 |
-
"Container_ship"
|
| 252 |
-
],
|
| 253 |
-
"ndcg@10": 1.0,
|
| 254 |
-
"recall@10": 1.0,
|
| 255 |
-
"precision@10": 0.1
|
| 256 |
-
},
|
| 257 |
-
"q16": {
|
| 258 |
-
"query": "Which strait is a narrow Indonesia-Malaysia chokepoint?",
|
| 259 |
-
"gold": [
|
| 260 |
-
"Strait_of_Malacca"
|
| 261 |
-
],
|
| 262 |
-
"top5": [
|
| 263 |
-
"Strait_of_Malacca",
|
| 264 |
-
"Strait_of_Hormuz",
|
| 265 |
-
"Bab-el-Mandeb",
|
| 266 |
-
"Port_of_Singapore",
|
| 267 |
-
"Suez_Canal"
|
| 268 |
-
],
|
| 269 |
-
"ndcg@10": 1.0,
|
| 270 |
-
"recall@10": 1.0,
|
| 271 |
-
"precision@10": 0.1
|
| 272 |
-
},
|
| 273 |
-
"q17": {
|
| 274 |
-
"query": "Which industry does the Baltic Dry Index track?",
|
| 275 |
-
"gold": [
|
| 276 |
-
"Baltic_Dry_Index"
|
| 277 |
-
],
|
| 278 |
-
"top5": [
|
| 279 |
-
"Baltic_Dry_Index",
|
| 280 |
-
"Semiconductor_industry",
|
| 281 |
-
"Inventory",
|
| 282 |
-
"Container_ship",
|
| 283 |
-
"2020\u20132023_global_chip_shortage"
|
| 284 |
-
],
|
| 285 |
-
"ndcg@10": 1.0,
|
| 286 |
-
"recall@10": 1.0,
|
| 287 |
-
"precision@10": 0.1
|
| 288 |
-
},
|
| 289 |
-
"q18": {
|
| 290 |
-
"query": "What function does a warehouse serve?",
|
| 291 |
-
"gold": [
|
| 292 |
-
"Warehouse"
|
| 293 |
-
],
|
| 294 |
-
"top5": [
|
| 295 |
-
"Warehouse",
|
| 296 |
-
"Inventory",
|
| 297 |
-
"Logistics",
|
| 298 |
-
"Container_ship",
|
| 299 |
-
"Supply_chain_management"
|
| 300 |
-
],
|
| 301 |
-
"ndcg@10": 1.0,
|
| 302 |
-
"recall@10": 1.0,
|
| 303 |
-
"precision@10": 0.1
|
| 304 |
-
},
|
| 305 |
-
"q19": {
|
| 306 |
-
"query": "What is a container ship's TEU?",
|
| 307 |
-
"gold": [
|
| 308 |
-
"Container_ship"
|
| 309 |
-
],
|
| 310 |
-
"top5": [
|
| 311 |
-
"Container_ship",
|
| 312 |
-
"Ever_Given",
|
| 313 |
-
"2021_Suez_Canal_obstruction",
|
| 314 |
-
"Port_of_Singapore",
|
| 315 |
-
"Port_of_Los_Angeles"
|
| 316 |
-
],
|
| 317 |
-
"ndcg@10": 1.0,
|
| 318 |
-
"recall@10": 1.0,
|
| 319 |
-
"precision@10": 0.1
|
| 320 |
-
},
|
| 321 |
-
"q20": {
|
| 322 |
-
"query": "What software replaces accounting + inventory + HR systems?",
|
| 323 |
-
"gold": [
|
| 324 |
-
"Enterprise_resource_planning"
|
| 325 |
-
],
|
| 326 |
-
"top5": [
|
| 327 |
-
"Enterprise_resource_planning",
|
| 328 |
-
"Inventory",
|
| 329 |
-
"Just-in-time_manufacturing",
|
| 330 |
-
"Supply_chain_management",
|
| 331 |
-
"Logistics"
|
| 332 |
-
],
|
| 333 |
-
"ndcg@10": 1.0,
|
| 334 |
-
"recall@10": 1.0,
|
| 335 |
-
"precision@10": 0.1
|
| 336 |
-
}
|
| 337 |
-
}
|
| 338 |
-
},
|
| 339 |
-
"bge-m3": {
|
| 340 |
-
"embedder": "bge-m3",
|
| 341 |
-
"mean_ndcg@10": 0.967519867361079,
|
| 342 |
-
"mean_recall@10": 1.0,
|
| 343 |
-
"mean_precision@10": 0.12000000000000002,
|
| 344 |
-
"corpus_encoding_s": 43.88751459121704,
|
| 345 |
-
"n_queries": 20,
|
| 346 |
-
"per_query": {
|
| 347 |
-
"q1": {
|
| 348 |
-
"query": "What was the magnitude of the 2011 Tohoku earthquake?",
|
| 349 |
-
"gold": [
|
| 350 |
-
"2011_T\u014dhoku_earthquake_and_tsunami"
|
| 351 |
-
],
|
| 352 |
-
"top5": [
|
| 353 |
-
"2011_T\u014dhoku_earthquake_and_tsunami",
|
| 354 |
-
"Foxconn",
|
| 355 |
-
"Bab-el-Mandeb",
|
| 356 |
-
"Ever_Given",
|
| 357 |
-
"2020\u20132023_global_chip_shortage"
|
| 358 |
-
],
|
| 359 |
-
"ndcg@10": 1.0,
|
| 360 |
-
"recall@10": 1.0,
|
| 361 |
-
"precision@10": 0.1
|
| 362 |
-
},
|
| 363 |
-
"q2": {
|
| 364 |
-
"query": "How long was the Suez Canal blocked in 2021?",
|
| 365 |
-
"gold": [
|
| 366 |
-
"2021_Suez_Canal_obstruction",
|
| 367 |
-
"Ever_Given"
|
| 368 |
-
],
|
| 369 |
-
"top5": [
|
| 370 |
-
"2021_Suez_Canal_obstruction",
|
| 371 |
-
"Suez_Canal",
|
| 372 |
-
"Ever_Given",
|
| 373 |
-
"Bab-el-Mandeb",
|
| 374 |
-
"2020\u20132023_global_chip_shortage"
|
| 375 |
-
],
|
| 376 |
-
"ndcg@10": 0.9197207891481876,
|
| 377 |
-
"recall@10": 1.0,
|
| 378 |
-
"precision@10": 0.2
|
| 379 |
-
},
|
| 380 |
-
"q3": {
|
| 381 |
-
"query": "What caused the global semiconductor shortage?",
|
| 382 |
-
"gold": [
|
| 383 |
-
"2020\u20132023_global_chip_shortage"
|
| 384 |
-
],
|
| 385 |
-
"top5": [
|
| 386 |
-
"2020\u20132023_global_chip_shortage",
|
| 387 |
-
"Semiconductor_industry",
|
| 388 |
-
"TSMC",
|
| 389 |
-
"Samsung_Electronics",
|
| 390 |
-
"Foxconn"
|
| 391 |
-
],
|
| 392 |
-
"ndcg@10": 1.0,
|
| 393 |
-
"recall@10": 1.0,
|
| 394 |
-
"precision@10": 0.1
|
| 395 |
-
},
|
| 396 |
-
"q4": {
|
| 397 |
-
"query": "Why is the Strait of Hormuz strategically important?",
|
| 398 |
-
"gold": [
|
| 399 |
-
"Strait_of_Hormuz"
|
| 400 |
-
],
|
| 401 |
-
"top5": [
|
| 402 |
-
"Strait_of_Hormuz",
|
| 403 |
-
"Bab-el-Mandeb",
|
| 404 |
-
"Strait_of_Malacca",
|
| 405 |
-
"Suez_Canal",
|
| 406 |
-
"Red_Sea_crisis"
|
| 407 |
-
],
|
| 408 |
-
"ndcg@10": 1.0,
|
| 409 |
-
"recall@10": 1.0,
|
| 410 |
-
"precision@10": 0.1
|
| 411 |
-
},
|
| 412 |
-
"q5": {
|
| 413 |
-
"query": "How do Houthis threaten Red Sea shipping?",
|
| 414 |
-
"gold": [
|
| 415 |
-
"Red_Sea_crisis",
|
| 416 |
-
"Bab-el-Mandeb"
|
| 417 |
-
],
|
| 418 |
-
"top5": [
|
| 419 |
-
"Red_Sea_crisis",
|
| 420 |
-
"Bab-el-Mandeb",
|
| 421 |
-
"Suez_Canal",
|
| 422 |
-
"2021_Suez_Canal_obstruction",
|
| 423 |
-
"Ever_Given"
|
| 424 |
-
],
|
| 425 |
-
"ndcg@10": 1.0,
|
| 426 |
-
"recall@10": 1.0,
|
| 427 |
-
"precision@10": 0.2
|
| 428 |
-
},
|
| 429 |
-
"q6": {
|
| 430 |
-
"query": "Which foundry dominates advanced chip production?",
|
| 431 |
-
"gold": [
|
| 432 |
-
"TSMC",
|
| 433 |
-
"Semiconductor_industry"
|
| 434 |
-
],
|
| 435 |
-
"top5": [
|
| 436 |
-
"Semiconductor_industry",
|
| 437 |
-
"TSMC",
|
| 438 |
-
"Foxconn",
|
| 439 |
-
"2020\u20132023_global_chip_shortage",
|
| 440 |
-
"Samsung_Electronics"
|
| 441 |
-
],
|
| 442 |
-
"ndcg@10": 1.0,
|
| 443 |
-
"recall@10": 1.0,
|
| 444 |
-
"precision@10": 0.2
|
| 445 |
-
},
|
| 446 |
-
"q7": {
|
| 447 |
-
"query": "What is the bullwhip effect?",
|
| 448 |
-
"gold": [
|
| 449 |
-
"Bullwhip_effect"
|
| 450 |
-
],
|
| 451 |
-
"top5": [
|
| 452 |
-
"Bullwhip_effect",
|
| 453 |
-
"2020\u20132023_global_chip_shortage",
|
| 454 |
-
"Baltic_Dry_Index",
|
| 455 |
-
"Bab-el-Mandeb",
|
| 456 |
-
"Just-in-time_manufacturing"
|
| 457 |
-
],
|
| 458 |
-
"ndcg@10": 1.0,
|
| 459 |
-
"recall@10": 1.0,
|
| 460 |
-
"precision@10": 0.1
|
| 461 |
-
},
|
| 462 |
-
"q8": {
|
| 463 |
-
"query": "Which port congested during 2021 supply chain crisis?",
|
| 464 |
-
"gold": [
|
| 465 |
-
"Port_of_Los_Angeles"
|
| 466 |
-
],
|
| 467 |
-
"top5": [
|
| 468 |
-
"2020\u20132023_global_chip_shortage",
|
| 469 |
-
"2021_Suez_Canal_obstruction",
|
| 470 |
-
"Ever_Given",
|
| 471 |
-
"Port_of_Los_Angeles",
|
| 472 |
-
"Bab-el-Mandeb"
|
| 473 |
-
],
|
| 474 |
-
"ndcg@10": 0.43067655807339306,
|
| 475 |
-
"recall@10": 1.0,
|
| 476 |
-
"precision@10": 0.1
|
| 477 |
-
},
|
| 478 |
-
"q9": {
|
| 479 |
-
"query": "What is the just-in-time manufacturing philosophy?",
|
| 480 |
-
"gold": [
|
| 481 |
-
"Just-in-time_manufacturing"
|
| 482 |
-
],
|
| 483 |
-
"top5": [
|
| 484 |
-
"Just-in-time_manufacturing",
|
| 485 |
-
"Inventory",
|
| 486 |
-
"Supply_chain_management",
|
| 487 |
-
"Foxconn",
|
| 488 |
-
"Logistics"
|
| 489 |
-
],
|
| 490 |
-
"ndcg@10": 1.0,
|
| 491 |
-
"recall@10": 1.0,
|
| 492 |
-
"precision@10": 0.1
|
| 493 |
-
},
|
| 494 |
-
"q10": {
|
| 495 |
-
"query": "What does the CHIPS Act allocate?",
|
| 496 |
-
"gold": [
|
| 497 |
-
"CHIPS_and_Science_Act"
|
| 498 |
-
],
|
| 499 |
-
"top5": [
|
| 500 |
-
"CHIPS_and_Science_Act",
|
| 501 |
-
"2020\u20132023_global_chip_shortage",
|
| 502 |
-
"TSMC",
|
| 503 |
-
"Foxconn",
|
| 504 |
-
"Supply_chain_attack"
|
| 505 |
-
],
|
| 506 |
-
"ndcg@10": 1.0,
|
| 507 |
-
"recall@10": 1.0,
|
| 508 |
-
"precision@10": 0.1
|
| 509 |
-
},
|
| 510 |
-
"q11": {
|
| 511 |
-
"query": "Who is Foxconn's primary customer?",
|
| 512 |
-
"gold": [
|
| 513 |
-
"Foxconn"
|
| 514 |
-
],
|
| 515 |
-
"top5": [
|
| 516 |
-
"Foxconn",
|
| 517 |
-
"TSMC",
|
| 518 |
-
"Semiconductor_industry",
|
| 519 |
-
"Ever_Given",
|
| 520 |
-
"2021_Suez_Canal_obstruction"
|
| 521 |
-
],
|
| 522 |
-
"ndcg@10": 1.0,
|
| 523 |
-
"recall@10": 1.0,
|
| 524 |
-
"precision@10": 0.1
|
| 525 |
-
},
|
| 526 |
-
"q12": {
|
| 527 |
-
"query": "Why did the Ever Given run aground?",
|
| 528 |
-
"gold": [
|
| 529 |
-
"Ever_Given",
|
| 530 |
-
"2021_Suez_Canal_obstruction"
|
| 531 |
-
],
|
| 532 |
-
"top5": [
|
| 533 |
-
"Ever_Given",
|
| 534 |
-
"2021_Suez_Canal_obstruction",
|
| 535 |
-
"2011_T\u014dhoku_earthquake_and_tsunami",
|
| 536 |
-
"Bab-el-Mandeb",
|
| 537 |
-
"2020\u20132023_global_chip_shortage"
|
| 538 |
-
],
|
| 539 |
-
"ndcg@10": 1.0,
|
| 540 |
-
"recall@10": 1.0,
|
| 541 |
-
"precision@10": 0.2
|
| 542 |
-
},
|
| 543 |
-
"q13": {
|
| 544 |
-
"query": "What is safety stock?",
|
| 545 |
-
"gold": [
|
| 546 |
-
"Inventory"
|
| 547 |
-
],
|
| 548 |
-
"top5": [
|
| 549 |
-
"Inventory",
|
| 550 |
-
"Supply_chain_attack",
|
| 551 |
-
"TSMC",
|
| 552 |
-
"Warehouse",
|
| 553 |
-
"Port_of_Singapore"
|
| 554 |
-
],
|
| 555 |
-
"ndcg@10": 1.0,
|
| 556 |
-
"recall@10": 1.0,
|
| 557 |
-
"precision@10": 0.1
|
| 558 |
-
},
|
| 559 |
-
"q14": {
|
| 560 |
-
"query": "What is a supply chain attack?",
|
| 561 |
-
"gold": [
|
| 562 |
-
"Supply_chain_attack"
|
| 563 |
-
],
|
| 564 |
-
"top5": [
|
| 565 |
-
"Supply_chain_attack",
|
| 566 |
-
"Supply_chain_management",
|
| 567 |
-
"Bullwhip_effect",
|
| 568 |
-
"2020\u20132023_global_chip_shortage",
|
| 569 |
-
"Logistics"
|
| 570 |
-
],
|
| 571 |
-
"ndcg@10": 1.0,
|
| 572 |
-
"recall@10": 1.0,
|
| 573 |
-
"precision@10": 0.1
|
| 574 |
-
},
|
| 575 |
-
"q15": {
|
| 576 |
-
"query": "How busy is the Port of Singapore?",
|
| 577 |
-
"gold": [
|
| 578 |
-
"Port_of_Singapore"
|
| 579 |
-
],
|
| 580 |
-
"top5": [
|
| 581 |
-
"Port_of_Singapore",
|
| 582 |
-
"Port_of_Los_Angeles",
|
| 583 |
-
"Strait_of_Malacca",
|
| 584 |
-
"2021_Suez_Canal_obstruction",
|
| 585 |
-
"Container_ship"
|
| 586 |
-
],
|
| 587 |
-
"ndcg@10": 1.0,
|
| 588 |
-
"recall@10": 1.0,
|
| 589 |
-
"precision@10": 0.1
|
| 590 |
-
},
|
| 591 |
-
"q16": {
|
| 592 |
-
"query": "Which strait is a narrow Indonesia-Malaysia chokepoint?",
|
| 593 |
-
"gold": [
|
| 594 |
-
"Strait_of_Malacca"
|
| 595 |
-
],
|
| 596 |
-
"top5": [
|
| 597 |
-
"Strait_of_Malacca",
|
| 598 |
-
"Bab-el-Mandeb",
|
| 599 |
-
"Strait_of_Hormuz",
|
| 600 |
-
"Port_of_Singapore",
|
| 601 |
-
"Suez_Canal"
|
| 602 |
-
],
|
| 603 |
-
"ndcg@10": 1.0,
|
| 604 |
-
"recall@10": 1.0,
|
| 605 |
-
"precision@10": 0.1
|
| 606 |
-
},
|
| 607 |
-
"q17": {
|
| 608 |
-
"query": "Which industry does the Baltic Dry Index track?",
|
| 609 |
-
"gold": [
|
| 610 |
-
"Baltic_Dry_Index"
|
| 611 |
-
],
|
| 612 |
-
"top5": [
|
| 613 |
-
"Baltic_Dry_Index",
|
| 614 |
-
"Inventory",
|
| 615 |
-
"2020\u20132023_global_chip_shortage",
|
| 616 |
-
"Semiconductor_industry",
|
| 617 |
-
"Logistics"
|
| 618 |
-
],
|
| 619 |
-
"ndcg@10": 1.0,
|
| 620 |
-
"recall@10": 1.0,
|
| 621 |
-
"precision@10": 0.1
|
| 622 |
-
},
|
| 623 |
-
"q18": {
|
| 624 |
-
"query": "What function does a warehouse serve?",
|
| 625 |
-
"gold": [
|
| 626 |
-
"Warehouse"
|
| 627 |
-
],
|
| 628 |
-
"top5": [
|
| 629 |
-
"Warehouse",
|
| 630 |
-
"Inventory",
|
| 631 |
-
"Logistics",
|
| 632 |
-
"Container_ship",
|
| 633 |
-
"Port_of_Singapore"
|
| 634 |
-
],
|
| 635 |
-
"ndcg@10": 1.0,
|
| 636 |
-
"recall@10": 1.0,
|
| 637 |
-
"precision@10": 0.1
|
| 638 |
-
},
|
| 639 |
-
"q19": {
|
| 640 |
-
"query": "What is a container ship's TEU?",
|
| 641 |
-
"gold": [
|
| 642 |
-
"Container_ship"
|
| 643 |
-
],
|
| 644 |
-
"top5": [
|
| 645 |
-
"Container_ship",
|
| 646 |
-
"Ever_Given",
|
| 647 |
-
"2021_Suez_Canal_obstruction",
|
| 648 |
-
"Baltic_Dry_Index",
|
| 649 |
-
"Port_of_Singapore"
|
| 650 |
-
],
|
| 651 |
-
"ndcg@10": 1.0,
|
| 652 |
-
"recall@10": 1.0,
|
| 653 |
-
"precision@10": 0.1
|
| 654 |
-
},
|
| 655 |
-
"q20": {
|
| 656 |
-
"query": "What software replaces accounting + inventory + HR systems?",
|
| 657 |
-
"gold": [
|
| 658 |
-
"Enterprise_resource_planning"
|
| 659 |
-
],
|
| 660 |
-
"top5": [
|
| 661 |
-
"Enterprise_resource_planning",
|
| 662 |
-
"Inventory",
|
| 663 |
-
"Supply_chain_attack",
|
| 664 |
-
"Just-in-time_manufacturing",
|
| 665 |
-
"Foxconn"
|
| 666 |
-
],
|
| 667 |
-
"ndcg@10": 1.0,
|
| 668 |
-
"recall@10": 1.0,
|
| 669 |
-
"precision@10": 0.1
|
| 670 |
-
}
|
| 671 |
-
}
|
| 672 |
-
},
|
| 673 |
-
"snowflake-arctic-l": {
|
| 674 |
-
"embedder": "snowflake-arctic-l",
|
| 675 |
-
"mean_ndcg@10": 0.9709860394574094,
|
| 676 |
-
"mean_recall@10": 1.0,
|
| 677 |
-
"mean_precision@10": 0.12000000000000002,
|
| 678 |
-
"corpus_encoding_s": 40.3898344039917,
|
| 679 |
-
"n_queries": 20,
|
| 680 |
-
"per_query": {
|
| 681 |
-
"q1": {
|
| 682 |
-
"query": "What was the magnitude of the 2011 Tohoku earthquake?",
|
| 683 |
-
"gold": [
|
| 684 |
-
"2011_T\u014dhoku_earthquake_and_tsunami"
|
| 685 |
-
],
|
| 686 |
-
"top5": [
|
| 687 |
-
"2011_T\u014dhoku_earthquake_and_tsunami",
|
| 688 |
-
"Ever_Given",
|
| 689 |
-
"2021_Suez_Canal_obstruction",
|
| 690 |
-
"Samsung_Electronics",
|
| 691 |
-
"Suez_Canal"
|
| 692 |
-
],
|
| 693 |
-
"ndcg@10": 1.0,
|
| 694 |
-
"recall@10": 1.0,
|
| 695 |
-
"precision@10": 0.1
|
| 696 |
-
},
|
| 697 |
-
"q2": {
|
| 698 |
-
"query": "How long was the Suez Canal blocked in 2021?",
|
| 699 |
-
"gold": [
|
| 700 |
-
"2021_Suez_Canal_obstruction",
|
| 701 |
-
"Ever_Given"
|
| 702 |
-
],
|
| 703 |
-
"top5": [
|
| 704 |
-
"2021_Suez_Canal_obstruction",
|
| 705 |
-
"Suez_Canal",
|
| 706 |
-
"Ever_Given",
|
| 707 |
-
"Red_Sea_crisis",
|
| 708 |
-
"Bab-el-Mandeb"
|
| 709 |
-
],
|
| 710 |
-
"ndcg@10": 0.9197207891481876,
|
| 711 |
-
"recall@10": 1.0,
|
| 712 |
-
"precision@10": 0.2
|
| 713 |
-
},
|
| 714 |
-
"q3": {
|
| 715 |
-
"query": "What caused the global semiconductor shortage?",
|
| 716 |
-
"gold": [
|
| 717 |
-
"2020\u20132023_global_chip_shortage"
|
| 718 |
-
],
|
| 719 |
-
"top5": [
|
| 720 |
-
"2020\u20132023_global_chip_shortage",
|
| 721 |
-
"Semiconductor_industry",
|
| 722 |
-
"TSMC",
|
| 723 |
-
"Supply_chain_attack",
|
| 724 |
-
"Foxconn"
|
| 725 |
-
],
|
| 726 |
-
"ndcg@10": 1.0,
|
| 727 |
-
"recall@10": 1.0,
|
| 728 |
-
"precision@10": 0.1
|
| 729 |
-
},
|
| 730 |
-
"q4": {
|
| 731 |
-
"query": "Why is the Strait of Hormuz strategically important?",
|
| 732 |
-
"gold": [
|
| 733 |
-
"Strait_of_Hormuz"
|
| 734 |
-
],
|
| 735 |
-
"top5": [
|
| 736 |
-
"Strait_of_Hormuz",
|
| 737 |
-
"Strait_of_Malacca",
|
| 738 |
-
"Bab-el-Mandeb",
|
| 739 |
-
"Suez_Canal",
|
| 740 |
-
"Red_Sea_crisis"
|
| 741 |
-
],
|
| 742 |
-
"ndcg@10": 1.0,
|
| 743 |
-
"recall@10": 1.0,
|
| 744 |
-
"precision@10": 0.1
|
| 745 |
-
},
|
| 746 |
-
"q5": {
|
| 747 |
-
"query": "How do Houthis threaten Red Sea shipping?",
|
| 748 |
-
"gold": [
|
| 749 |
-
"Red_Sea_crisis",
|
| 750 |
-
"Bab-el-Mandeb"
|
| 751 |
-
],
|
| 752 |
-
"top5": [
|
| 753 |
-
"Red_Sea_crisis",
|
| 754 |
-
"Bab-el-Mandeb",
|
| 755 |
-
"Strait_of_Hormuz",
|
| 756 |
-
"Suez_Canal",
|
| 757 |
-
"2021_Suez_Canal_obstruction"
|
| 758 |
-
],
|
| 759 |
-
"ndcg@10": 1.0,
|
| 760 |
-
"recall@10": 1.0,
|
| 761 |
-
"precision@10": 0.2
|
| 762 |
-
},
|
| 763 |
-
"q6": {
|
| 764 |
-
"query": "Which foundry dominates advanced chip production?",
|
| 765 |
-
"gold": [
|
| 766 |
-
"TSMC",
|
| 767 |
-
"Semiconductor_industry"
|
| 768 |
-
],
|
| 769 |
-
"top5": [
|
| 770 |
-
"Semiconductor_industry",
|
| 771 |
-
"TSMC",
|
| 772 |
-
"2020\u20132023_global_chip_shortage",
|
| 773 |
-
"Foxconn",
|
| 774 |
-
"CHIPS_and_Science_Act"
|
| 775 |
-
],
|
| 776 |
-
"ndcg@10": 1.0,
|
| 777 |
-
"recall@10": 1.0,
|
| 778 |
-
"precision@10": 0.2
|
| 779 |
-
},
|
| 780 |
-
"q7": {
|
| 781 |
-
"query": "What is the bullwhip effect?",
|
| 782 |
-
"gold": [
|
| 783 |
-
"Bullwhip_effect"
|
| 784 |
-
],
|
| 785 |
-
"top5": [
|
| 786 |
-
"Bullwhip_effect",
|
| 787 |
-
"Just-in-time_manufacturing",
|
| 788 |
-
"Baltic_Dry_Index",
|
| 789 |
-
"Inventory",
|
| 790 |
-
"Bab-el-Mandeb"
|
| 791 |
-
],
|
| 792 |
-
"ndcg@10": 1.0,
|
| 793 |
-
"recall@10": 1.0,
|
| 794 |
-
"precision@10": 0.1
|
| 795 |
-
},
|
| 796 |
-
"q8": {
|
| 797 |
-
"query": "Which port congested during 2021 supply chain crisis?",
|
| 798 |
-
"gold": [
|
| 799 |
-
"Port_of_Los_Angeles"
|
| 800 |
-
],
|
| 801 |
-
"top5": [
|
| 802 |
-
"2020\u20132023_global_chip_shortage",
|
| 803 |
-
"2021_Suez_Canal_obstruction",
|
| 804 |
-
"Port_of_Los_Angeles",
|
| 805 |
-
"Ever_Given",
|
| 806 |
-
"Supply_chain_attack"
|
| 807 |
-
],
|
| 808 |
-
"ndcg@10": 0.5,
|
| 809 |
-
"recall@10": 1.0,
|
| 810 |
-
"precision@10": 0.1
|
| 811 |
-
},
|
| 812 |
-
"q9": {
|
| 813 |
-
"query": "What is the just-in-time manufacturing philosophy?",
|
| 814 |
-
"gold": [
|
| 815 |
-
"Just-in-time_manufacturing"
|
| 816 |
-
],
|
| 817 |
-
"top5": [
|
| 818 |
-
"Just-in-time_manufacturing",
|
| 819 |
-
"Supply_chain_management",
|
| 820 |
-
"Inventory",
|
| 821 |
-
"Logistics",
|
| 822 |
-
"Semiconductor_industry"
|
| 823 |
-
],
|
| 824 |
-
"ndcg@10": 1.0,
|
| 825 |
-
"recall@10": 1.0,
|
| 826 |
-
"precision@10": 0.1
|
| 827 |
-
},
|
| 828 |
-
"q10": {
|
| 829 |
-
"query": "What does the CHIPS Act allocate?",
|
| 830 |
-
"gold": [
|
| 831 |
-
"CHIPS_and_Science_Act"
|
| 832 |
-
],
|
| 833 |
-
"top5": [
|
| 834 |
-
"CHIPS_and_Science_Act",
|
| 835 |
-
"2020\u20132023_global_chip_shortage",
|
| 836 |
-
"Semiconductor_industry",
|
| 837 |
-
"TSMC",
|
| 838 |
-
"Supply_chain_attack"
|
| 839 |
-
],
|
| 840 |
-
"ndcg@10": 1.0,
|
| 841 |
-
"recall@10": 1.0,
|
| 842 |
-
"precision@10": 0.1
|
| 843 |
-
},
|
| 844 |
-
"q11": {
|
| 845 |
-
"query": "Who is Foxconn's primary customer?",
|
| 846 |
-
"gold": [
|
| 847 |
-
"Foxconn"
|
| 848 |
-
],
|
| 849 |
-
"top5": [
|
| 850 |
-
"Foxconn",
|
| 851 |
-
"TSMC",
|
| 852 |
-
"Semiconductor_industry",
|
| 853 |
-
"2020\u20132023_global_chip_shortage",
|
| 854 |
-
"Supply_chain_management"
|
| 855 |
-
],
|
| 856 |
-
"ndcg@10": 1.0,
|
| 857 |
-
"recall@10": 1.0,
|
| 858 |
-
"precision@10": 0.1
|
| 859 |
-
},
|
| 860 |
-
"q12": {
|
| 861 |
-
"query": "Why did the Ever Given run aground?",
|
| 862 |
-
"gold": [
|
| 863 |
-
"Ever_Given",
|
| 864 |
-
"2021_Suez_Canal_obstruction"
|
| 865 |
-
],
|
| 866 |
-
"top5": [
|
| 867 |
-
"Ever_Given",
|
| 868 |
-
"2021_Suez_Canal_obstruction",
|
| 869 |
-
"Bab-el-Mandeb",
|
| 870 |
-
"Strait_of_Hormuz",
|
| 871 |
-
"Container_ship"
|
| 872 |
-
],
|
| 873 |
-
"ndcg@10": 1.0,
|
| 874 |
-
"recall@10": 1.0,
|
| 875 |
-
"precision@10": 0.2
|
| 876 |
-
},
|
| 877 |
-
"q13": {
|
| 878 |
-
"query": "What is safety stock?",
|
| 879 |
-
"gold": [
|
| 880 |
-
"Inventory"
|
| 881 |
-
],
|
| 882 |
-
"top5": [
|
| 883 |
-
"Inventory",
|
| 884 |
-
"Supply_chain_attack",
|
| 885 |
-
"Bullwhip_effect",
|
| 886 |
-
"Logistics",
|
| 887 |
-
"Baltic_Dry_Index"
|
| 888 |
-
],
|
| 889 |
-
"ndcg@10": 1.0,
|
| 890 |
-
"recall@10": 1.0,
|
| 891 |
-
"precision@10": 0.1
|
| 892 |
-
},
|
| 893 |
-
"q14": {
|
| 894 |
-
"query": "What is a supply chain attack?",
|
| 895 |
-
"gold": [
|
| 896 |
-
"Supply_chain_attack"
|
| 897 |
-
],
|
| 898 |
-
"top5": [
|
| 899 |
-
"Supply_chain_attack",
|
| 900 |
-
"Supply_chain_management",
|
| 901 |
-
"Bullwhip_effect",
|
| 902 |
-
"Logistics",
|
| 903 |
-
"2020\u20132023_global_chip_shortage"
|
| 904 |
-
],
|
| 905 |
-
"ndcg@10": 1.0,
|
| 906 |
-
"recall@10": 1.0,
|
| 907 |
-
"precision@10": 0.1
|
| 908 |
-
},
|
| 909 |
-
"q15": {
|
| 910 |
-
"query": "How busy is the Port of Singapore?",
|
| 911 |
-
"gold": [
|
| 912 |
-
"Port_of_Singapore"
|
| 913 |
-
],
|
| 914 |
-
"top5": [
|
| 915 |
-
"Port_of_Singapore",
|
| 916 |
-
"Strait_of_Malacca",
|
| 917 |
-
"Port_of_Los_Angeles",
|
| 918 |
-
"Container_ship",
|
| 919 |
-
"2021_Suez_Canal_obstruction"
|
| 920 |
-
],
|
| 921 |
-
"ndcg@10": 1.0,
|
| 922 |
-
"recall@10": 1.0,
|
| 923 |
-
"precision@10": 0.1
|
| 924 |
-
},
|
| 925 |
-
"q16": {
|
| 926 |
-
"query": "Which strait is a narrow Indonesia-Malaysia chokepoint?",
|
| 927 |
-
"gold": [
|
| 928 |
-
"Strait_of_Malacca"
|
| 929 |
-
],
|
| 930 |
-
"top5": [
|
| 931 |
-
"Strait_of_Malacca",
|
| 932 |
-
"Strait_of_Hormuz",
|
| 933 |
-
"Bab-el-Mandeb",
|
| 934 |
-
"Port_of_Singapore",
|
| 935 |
-
"Suez_Canal"
|
| 936 |
-
],
|
| 937 |
-
"ndcg@10": 1.0,
|
| 938 |
-
"recall@10": 1.0,
|
| 939 |
-
"precision@10": 0.1
|
| 940 |
-
},
|
| 941 |
-
"q17": {
|
| 942 |
-
"query": "Which industry does the Baltic Dry Index track?",
|
| 943 |
-
"gold": [
|
| 944 |
-
"Baltic_Dry_Index"
|
| 945 |
-
],
|
| 946 |
-
"top5": [
|
| 947 |
-
"Baltic_Dry_Index",
|
| 948 |
-
"Inventory",
|
| 949 |
-
"Logistics",
|
| 950 |
-
"Semiconductor_industry",
|
| 951 |
-
"Enterprise_resource_planning"
|
| 952 |
-
],
|
| 953 |
-
"ndcg@10": 1.0,
|
| 954 |
-
"recall@10": 1.0,
|
| 955 |
-
"precision@10": 0.1
|
| 956 |
-
},
|
| 957 |
-
"q18": {
|
| 958 |
-
"query": "What function does a warehouse serve?",
|
| 959 |
-
"gold": [
|
| 960 |
-
"Warehouse"
|
| 961 |
-
],
|
| 962 |
-
"top5": [
|
| 963 |
-
"Warehouse",
|
| 964 |
-
"Inventory",
|
| 965 |
-
"Logistics",
|
| 966 |
-
"Supply_chain_management",
|
| 967 |
-
"Enterprise_resource_planning"
|
| 968 |
-
],
|
| 969 |
-
"ndcg@10": 1.0,
|
| 970 |
-
"recall@10": 1.0,
|
| 971 |
-
"precision@10": 0.1
|
| 972 |
-
},
|
| 973 |
-
"q19": {
|
| 974 |
-
"query": "What is a container ship's TEU?",
|
| 975 |
-
"gold": [
|
| 976 |
-
"Container_ship"
|
| 977 |
-
],
|
| 978 |
-
"top5": [
|
| 979 |
-
"Container_ship",
|
| 980 |
-
"Ever_Given",
|
| 981 |
-
"Inventory",
|
| 982 |
-
"2021_Suez_Canal_obstruction",
|
| 983 |
-
"Baltic_Dry_Index"
|
| 984 |
-
],
|
| 985 |
-
"ndcg@10": 1.0,
|
| 986 |
-
"recall@10": 1.0,
|
| 987 |
-
"precision@10": 0.1
|
| 988 |
-
},
|
| 989 |
-
"q20": {
|
| 990 |
-
"query": "What software replaces accounting + inventory + HR systems?",
|
| 991 |
-
"gold": [
|
| 992 |
-
"Enterprise_resource_planning"
|
| 993 |
-
],
|
| 994 |
-
"top5": [
|
| 995 |
-
"Enterprise_resource_planning",
|
| 996 |
-
"Inventory",
|
| 997 |
-
"Supply_chain_management",
|
| 998 |
-
"Logistics",
|
| 999 |
-
"Supply_chain_attack"
|
| 1000 |
-
],
|
| 1001 |
-
"ndcg@10": 1.0,
|
| 1002 |
-
"recall@10": 1.0,
|
| 1003 |
-
"precision@10": 0.1
|
| 1004 |
-
}
|
| 1005 |
-
}
|
| 1006 |
-
}
|
| 1007 |
-
},
|
| 1008 |
-
"public_ref_nfcorpus": {
|
| 1009 |
-
"mxbai-embed-large-v1": {
|
| 1010 |
-
"ndcg@10_nfcorpus": 0.386,
|
| 1011 |
-
"source": "MTEB retrieval leaderboard 2024"
|
| 1012 |
-
},
|
| 1013 |
-
"bge-m3": {
|
| 1014 |
-
"ndcg@10_nfcorpus": 0.357,
|
| 1015 |
-
"source": "BGE-M3 paper + MTEB"
|
| 1016 |
-
},
|
| 1017 |
-
"snowflake-arctic-l": {
|
| 1018 |
-
"ndcg@10_nfcorpus": 0.348,
|
| 1019 |
-
"source": "Snowflake Arctic paper"
|
| 1020 |
-
}
|
| 1021 |
-
},
|
| 1022 |
-
"elapsed_min": 1.861957597732544
|
| 1023 |
}
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "SupplyMind-crisis-retrieval-BEIR-style",
|
| 3 |
+
"task_description": "Manual BEIR-style retrieval eval on 26 Wikipedia crisis articles + 20 real supply-chain queries. Metrics match the public MTEB retrieval leaderboard (nDCG@10, R@10, P@10). This is an out-of-domain task (supply chain, not medical), but numbers provide a directional check that our embedders are consistent with their published leaderboard performance.",
|
| 4 |
+
"our_results": {
|
| 5 |
+
"mxbai-embed-large-v1": {
|
| 6 |
+
"embedder": "mxbai-embed-large-v1",
|
| 7 |
+
"mean_ndcg@10": 0.9597824382702198,
|
| 8 |
+
"mean_recall@10": 1.0,
|
| 9 |
+
"mean_precision@10": 0.12000000000000002,
|
| 10 |
+
"corpus_encoding_s": 12.996914148330688,
|
| 11 |
+
"n_queries": 20,
|
| 12 |
+
"per_query": {
|
| 13 |
+
"q1": {
|
| 14 |
+
"query": "What was the magnitude of the 2011 Tohoku earthquake?",
|
| 15 |
+
"gold": [
|
| 16 |
+
"2011_T\u014dhoku_earthquake_and_tsunami"
|
| 17 |
+
],
|
| 18 |
+
"top5": [
|
| 19 |
+
"2011_T\u014dhoku_earthquake_and_tsunami",
|
| 20 |
+
"Ever_Given",
|
| 21 |
+
"2020\u20132023_global_chip_shortage",
|
| 22 |
+
"Container_ship",
|
| 23 |
+
"Warehouse"
|
| 24 |
+
],
|
| 25 |
+
"ndcg@10": 1.0,
|
| 26 |
+
"recall@10": 1.0,
|
| 27 |
+
"precision@10": 0.1
|
| 28 |
+
},
|
| 29 |
+
"q2": {
|
| 30 |
+
"query": "How long was the Suez Canal blocked in 2021?",
|
| 31 |
+
"gold": [
|
| 32 |
+
"2021_Suez_Canal_obstruction",
|
| 33 |
+
"Ever_Given"
|
| 34 |
+
],
|
| 35 |
+
"top5": [
|
| 36 |
+
"2021_Suez_Canal_obstruction",
|
| 37 |
+
"Suez_Canal",
|
| 38 |
+
"Ever_Given",
|
| 39 |
+
"Red_Sea_crisis",
|
| 40 |
+
"Bab-el-Mandeb"
|
| 41 |
+
],
|
| 42 |
+
"ndcg@10": 0.9197207891481876,
|
| 43 |
+
"recall@10": 1.0,
|
| 44 |
+
"precision@10": 0.2
|
| 45 |
+
},
|
| 46 |
+
"q3": {
|
| 47 |
+
"query": "What caused the global semiconductor shortage?",
|
| 48 |
+
"gold": [
|
| 49 |
+
"2020\u20132023_global_chip_shortage"
|
| 50 |
+
],
|
| 51 |
+
"top5": [
|
| 52 |
+
"2020\u20132023_global_chip_shortage",
|
| 53 |
+
"Semiconductor_industry",
|
| 54 |
+
"TSMC",
|
| 55 |
+
"Bullwhip_effect",
|
| 56 |
+
"CHIPS_and_Science_Act"
|
| 57 |
+
],
|
| 58 |
+
"ndcg@10": 1.0,
|
| 59 |
+
"recall@10": 1.0,
|
| 60 |
+
"precision@10": 0.1
|
| 61 |
+
},
|
| 62 |
+
"q4": {
|
| 63 |
+
"query": "Why is the Strait of Hormuz strategically important?",
|
| 64 |
+
"gold": [
|
| 65 |
+
"Strait_of_Hormuz"
|
| 66 |
+
],
|
| 67 |
+
"top5": [
|
| 68 |
+
"Strait_of_Hormuz",
|
| 69 |
+
"Strait_of_Malacca",
|
| 70 |
+
"Bab-el-Mandeb",
|
| 71 |
+
"Suez_Canal",
|
| 72 |
+
"Port_of_Singapore"
|
| 73 |
+
],
|
| 74 |
+
"ndcg@10": 1.0,
|
| 75 |
+
"recall@10": 1.0,
|
| 76 |
+
"precision@10": 0.1
|
| 77 |
+
},
|
| 78 |
+
"q5": {
|
| 79 |
+
"query": "How do Houthis threaten Red Sea shipping?",
|
| 80 |
+
"gold": [
|
| 81 |
+
"Red_Sea_crisis",
|
| 82 |
+
"Bab-el-Mandeb"
|
| 83 |
+
],
|
| 84 |
+
"top5": [
|
| 85 |
+
"Red_Sea_crisis",
|
| 86 |
+
"2021_Suez_Canal_obstruction",
|
| 87 |
+
"Bab-el-Mandeb",
|
| 88 |
+
"Strait_of_Hormuz",
|
| 89 |
+
"Suez_Canal"
|
| 90 |
+
],
|
| 91 |
+
"ndcg@10": 0.9197207891481876,
|
| 92 |
+
"recall@10": 1.0,
|
| 93 |
+
"precision@10": 0.2
|
| 94 |
+
},
|
| 95 |
+
"q6": {
|
| 96 |
+
"query": "Which foundry dominates advanced chip production?",
|
| 97 |
+
"gold": [
|
| 98 |
+
"TSMC",
|
| 99 |
+
"Semiconductor_industry"
|
| 100 |
+
],
|
| 101 |
+
"top5": [
|
| 102 |
+
"TSMC",
|
| 103 |
+
"Semiconductor_industry",
|
| 104 |
+
"Foxconn",
|
| 105 |
+
"CHIPS_and_Science_Act",
|
| 106 |
+
"2020\u20132023_global_chip_shortage"
|
| 107 |
+
],
|
| 108 |
+
"ndcg@10": 1.0,
|
| 109 |
+
"recall@10": 1.0,
|
| 110 |
+
"precision@10": 0.2
|
| 111 |
+
},
|
| 112 |
+
"q7": {
|
| 113 |
+
"query": "What is the bullwhip effect?",
|
| 114 |
+
"gold": [
|
| 115 |
+
"Bullwhip_effect"
|
| 116 |
+
],
|
| 117 |
+
"top5": [
|
| 118 |
+
"Bullwhip_effect",
|
| 119 |
+
"Inventory",
|
| 120 |
+
"Supply_chain_management",
|
| 121 |
+
"Supply_chain_attack",
|
| 122 |
+
"2020\u20132023_global_chip_shortage"
|
| 123 |
+
],
|
| 124 |
+
"ndcg@10": 1.0,
|
| 125 |
+
"recall@10": 1.0,
|
| 126 |
+
"precision@10": 0.1
|
| 127 |
+
},
|
| 128 |
+
"q8": {
|
| 129 |
+
"query": "Which port congested during 2021 supply chain crisis?",
|
| 130 |
+
"gold": [
|
| 131 |
+
"Port_of_Los_Angeles"
|
| 132 |
+
],
|
| 133 |
+
"top5": [
|
| 134 |
+
"2021_Suez_Canal_obstruction",
|
| 135 |
+
"2020\u20132023_global_chip_shortage",
|
| 136 |
+
"Ever_Given",
|
| 137 |
+
"Port_of_Singapore",
|
| 138 |
+
"Container_ship"
|
| 139 |
+
],
|
| 140 |
+
"ndcg@10": 0.3562071871080222,
|
| 141 |
+
"recall@10": 1.0,
|
| 142 |
+
"precision@10": 0.1
|
| 143 |
+
},
|
| 144 |
+
"q9": {
|
| 145 |
+
"query": "What is the just-in-time manufacturing philosophy?",
|
| 146 |
+
"gold": [
|
| 147 |
+
"Just-in-time_manufacturing"
|
| 148 |
+
],
|
| 149 |
+
"top5": [
|
| 150 |
+
"Just-in-time_manufacturing",
|
| 151 |
+
"Inventory",
|
| 152 |
+
"Supply_chain_management",
|
| 153 |
+
"Logistics",
|
| 154 |
+
"Enterprise_resource_planning"
|
| 155 |
+
],
|
| 156 |
+
"ndcg@10": 1.0,
|
| 157 |
+
"recall@10": 1.0,
|
| 158 |
+
"precision@10": 0.1
|
| 159 |
+
},
|
| 160 |
+
"q10": {
|
| 161 |
+
"query": "What does the CHIPS Act allocate?",
|
| 162 |
+
"gold": [
|
| 163 |
+
"CHIPS_and_Science_Act"
|
| 164 |
+
],
|
| 165 |
+
"top5": [
|
| 166 |
+
"CHIPS_and_Science_Act",
|
| 167 |
+
"2020\u20132023_global_chip_shortage",
|
| 168 |
+
"Semiconductor_industry",
|
| 169 |
+
"TSMC",
|
| 170 |
+
"Inventory"
|
| 171 |
+
],
|
| 172 |
+
"ndcg@10": 1.0,
|
| 173 |
+
"recall@10": 1.0,
|
| 174 |
+
"precision@10": 0.1
|
| 175 |
+
},
|
| 176 |
+
"q11": {
|
| 177 |
+
"query": "Who is Foxconn's primary customer?",
|
| 178 |
+
"gold": [
|
| 179 |
+
"Foxconn"
|
| 180 |
+
],
|
| 181 |
+
"top5": [
|
| 182 |
+
"Foxconn",
|
| 183 |
+
"Semiconductor_industry",
|
| 184 |
+
"TSMC",
|
| 185 |
+
"Bullwhip_effect",
|
| 186 |
+
"Samsung_Electronics"
|
| 187 |
+
],
|
| 188 |
+
"ndcg@10": 1.0,
|
| 189 |
+
"recall@10": 1.0,
|
| 190 |
+
"precision@10": 0.1
|
| 191 |
+
},
|
| 192 |
+
"q12": {
|
| 193 |
+
"query": "Why did the Ever Given run aground?",
|
| 194 |
+
"gold": [
|
| 195 |
+
"Ever_Given",
|
| 196 |
+
"2021_Suez_Canal_obstruction"
|
| 197 |
+
],
|
| 198 |
+
"top5": [
|
| 199 |
+
"Ever_Given",
|
| 200 |
+
"2021_Suez_Canal_obstruction",
|
| 201 |
+
"Container_ship",
|
| 202 |
+
"2011_T\u014dhoku_earthquake_and_tsunami",
|
| 203 |
+
"Suez_Canal"
|
| 204 |
+
],
|
| 205 |
+
"ndcg@10": 1.0,
|
| 206 |
+
"recall@10": 1.0,
|
| 207 |
+
"precision@10": 0.2
|
| 208 |
+
},
|
| 209 |
+
"q13": {
|
| 210 |
+
"query": "What is safety stock?",
|
| 211 |
+
"gold": [
|
| 212 |
+
"Inventory"
|
| 213 |
+
],
|
| 214 |
+
"top5": [
|
| 215 |
+
"Inventory",
|
| 216 |
+
"Container_ship",
|
| 217 |
+
"Just-in-time_manufacturing",
|
| 218 |
+
"Bullwhip_effect",
|
| 219 |
+
"Warehouse"
|
| 220 |
+
],
|
| 221 |
+
"ndcg@10": 1.0,
|
| 222 |
+
"recall@10": 1.0,
|
| 223 |
+
"precision@10": 0.1
|
| 224 |
+
},
|
| 225 |
+
"q14": {
|
| 226 |
+
"query": "What is a supply chain attack?",
|
| 227 |
+
"gold": [
|
| 228 |
+
"Supply_chain_attack"
|
| 229 |
+
],
|
| 230 |
+
"top5": [
|
| 231 |
+
"Supply_chain_attack",
|
| 232 |
+
"Supply_chain_management",
|
| 233 |
+
"Bullwhip_effect",
|
| 234 |
+
"Logistics",
|
| 235 |
+
"Inventory"
|
| 236 |
+
],
|
| 237 |
+
"ndcg@10": 1.0,
|
| 238 |
+
"recall@10": 1.0,
|
| 239 |
+
"precision@10": 0.1
|
| 240 |
+
},
|
| 241 |
+
"q15": {
|
| 242 |
+
"query": "How busy is the Port of Singapore?",
|
| 243 |
+
"gold": [
|
| 244 |
+
"Port_of_Singapore"
|
| 245 |
+
],
|
| 246 |
+
"top5": [
|
| 247 |
+
"Port_of_Singapore",
|
| 248 |
+
"Strait_of_Malacca",
|
| 249 |
+
"Port_of_Los_Angeles",
|
| 250 |
+
"2021_Suez_Canal_obstruction",
|
| 251 |
+
"Container_ship"
|
| 252 |
+
],
|
| 253 |
+
"ndcg@10": 1.0,
|
| 254 |
+
"recall@10": 1.0,
|
| 255 |
+
"precision@10": 0.1
|
| 256 |
+
},
|
| 257 |
+
"q16": {
|
| 258 |
+
"query": "Which strait is a narrow Indonesia-Malaysia chokepoint?",
|
| 259 |
+
"gold": [
|
| 260 |
+
"Strait_of_Malacca"
|
| 261 |
+
],
|
| 262 |
+
"top5": [
|
| 263 |
+
"Strait_of_Malacca",
|
| 264 |
+
"Strait_of_Hormuz",
|
| 265 |
+
"Bab-el-Mandeb",
|
| 266 |
+
"Port_of_Singapore",
|
| 267 |
+
"Suez_Canal"
|
| 268 |
+
],
|
| 269 |
+
"ndcg@10": 1.0,
|
| 270 |
+
"recall@10": 1.0,
|
| 271 |
+
"precision@10": 0.1
|
| 272 |
+
},
|
| 273 |
+
"q17": {
|
| 274 |
+
"query": "Which industry does the Baltic Dry Index track?",
|
| 275 |
+
"gold": [
|
| 276 |
+
"Baltic_Dry_Index"
|
| 277 |
+
],
|
| 278 |
+
"top5": [
|
| 279 |
+
"Baltic_Dry_Index",
|
| 280 |
+
"Semiconductor_industry",
|
| 281 |
+
"Inventory",
|
| 282 |
+
"Container_ship",
|
| 283 |
+
"2020\u20132023_global_chip_shortage"
|
| 284 |
+
],
|
| 285 |
+
"ndcg@10": 1.0,
|
| 286 |
+
"recall@10": 1.0,
|
| 287 |
+
"precision@10": 0.1
|
| 288 |
+
},
|
| 289 |
+
"q18": {
|
| 290 |
+
"query": "What function does a warehouse serve?",
|
| 291 |
+
"gold": [
|
| 292 |
+
"Warehouse"
|
| 293 |
+
],
|
| 294 |
+
"top5": [
|
| 295 |
+
"Warehouse",
|
| 296 |
+
"Inventory",
|
| 297 |
+
"Logistics",
|
| 298 |
+
"Container_ship",
|
| 299 |
+
"Supply_chain_management"
|
| 300 |
+
],
|
| 301 |
+
"ndcg@10": 1.0,
|
| 302 |
+
"recall@10": 1.0,
|
| 303 |
+
"precision@10": 0.1
|
| 304 |
+
},
|
| 305 |
+
"q19": {
|
| 306 |
+
"query": "What is a container ship's TEU?",
|
| 307 |
+
"gold": [
|
| 308 |
+
"Container_ship"
|
| 309 |
+
],
|
| 310 |
+
"top5": [
|
| 311 |
+
"Container_ship",
|
| 312 |
+
"Ever_Given",
|
| 313 |
+
"2021_Suez_Canal_obstruction",
|
| 314 |
+
"Port_of_Singapore",
|
| 315 |
+
"Port_of_Los_Angeles"
|
| 316 |
+
],
|
| 317 |
+
"ndcg@10": 1.0,
|
| 318 |
+
"recall@10": 1.0,
|
| 319 |
+
"precision@10": 0.1
|
| 320 |
+
},
|
| 321 |
+
"q20": {
|
| 322 |
+
"query": "What software replaces accounting + inventory + HR systems?",
|
| 323 |
+
"gold": [
|
| 324 |
+
"Enterprise_resource_planning"
|
| 325 |
+
],
|
| 326 |
+
"top5": [
|
| 327 |
+
"Enterprise_resource_planning",
|
| 328 |
+
"Inventory",
|
| 329 |
+
"Just-in-time_manufacturing",
|
| 330 |
+
"Supply_chain_management",
|
| 331 |
+
"Logistics"
|
| 332 |
+
],
|
| 333 |
+
"ndcg@10": 1.0,
|
| 334 |
+
"recall@10": 1.0,
|
| 335 |
+
"precision@10": 0.1
|
| 336 |
+
}
|
| 337 |
+
}
|
| 338 |
+
},
|
| 339 |
+
"bge-m3": {
|
| 340 |
+
"embedder": "bge-m3",
|
| 341 |
+
"mean_ndcg@10": 0.967519867361079,
|
| 342 |
+
"mean_recall@10": 1.0,
|
| 343 |
+
"mean_precision@10": 0.12000000000000002,
|
| 344 |
+
"corpus_encoding_s": 43.88751459121704,
|
| 345 |
+
"n_queries": 20,
|
| 346 |
+
"per_query": {
|
| 347 |
+
"q1": {
|
| 348 |
+
"query": "What was the magnitude of the 2011 Tohoku earthquake?",
|
| 349 |
+
"gold": [
|
| 350 |
+
"2011_T\u014dhoku_earthquake_and_tsunami"
|
| 351 |
+
],
|
| 352 |
+
"top5": [
|
| 353 |
+
"2011_T\u014dhoku_earthquake_and_tsunami",
|
| 354 |
+
"Foxconn",
|
| 355 |
+
"Bab-el-Mandeb",
|
| 356 |
+
"Ever_Given",
|
| 357 |
+
"2020\u20132023_global_chip_shortage"
|
| 358 |
+
],
|
| 359 |
+
"ndcg@10": 1.0,
|
| 360 |
+
"recall@10": 1.0,
|
| 361 |
+
"precision@10": 0.1
|
| 362 |
+
},
|
| 363 |
+
"q2": {
|
| 364 |
+
"query": "How long was the Suez Canal blocked in 2021?",
|
| 365 |
+
"gold": [
|
| 366 |
+
"2021_Suez_Canal_obstruction",
|
| 367 |
+
"Ever_Given"
|
| 368 |
+
],
|
| 369 |
+
"top5": [
|
| 370 |
+
"2021_Suez_Canal_obstruction",
|
| 371 |
+
"Suez_Canal",
|
| 372 |
+
"Ever_Given",
|
| 373 |
+
"Bab-el-Mandeb",
|
| 374 |
+
"2020\u20132023_global_chip_shortage"
|
| 375 |
+
],
|
| 376 |
+
"ndcg@10": 0.9197207891481876,
|
| 377 |
+
"recall@10": 1.0,
|
| 378 |
+
"precision@10": 0.2
|
| 379 |
+
},
|
| 380 |
+
"q3": {
|
| 381 |
+
"query": "What caused the global semiconductor shortage?",
|
| 382 |
+
"gold": [
|
| 383 |
+
"2020\u20132023_global_chip_shortage"
|
| 384 |
+
],
|
| 385 |
+
"top5": [
|
| 386 |
+
"2020\u20132023_global_chip_shortage",
|
| 387 |
+
"Semiconductor_industry",
|
| 388 |
+
"TSMC",
|
| 389 |
+
"Samsung_Electronics",
|
| 390 |
+
"Foxconn"
|
| 391 |
+
],
|
| 392 |
+
"ndcg@10": 1.0,
|
| 393 |
+
"recall@10": 1.0,
|
| 394 |
+
"precision@10": 0.1
|
| 395 |
+
},
|
| 396 |
+
"q4": {
|
| 397 |
+
"query": "Why is the Strait of Hormuz strategically important?",
|
| 398 |
+
"gold": [
|
| 399 |
+
"Strait_of_Hormuz"
|
| 400 |
+
],
|
| 401 |
+
"top5": [
|
| 402 |
+
"Strait_of_Hormuz",
|
| 403 |
+
"Bab-el-Mandeb",
|
| 404 |
+
"Strait_of_Malacca",
|
| 405 |
+
"Suez_Canal",
|
| 406 |
+
"Red_Sea_crisis"
|
| 407 |
+
],
|
| 408 |
+
"ndcg@10": 1.0,
|
| 409 |
+
"recall@10": 1.0,
|
| 410 |
+
"precision@10": 0.1
|
| 411 |
+
},
|
| 412 |
+
"q5": {
|
| 413 |
+
"query": "How do Houthis threaten Red Sea shipping?",
|
| 414 |
+
"gold": [
|
| 415 |
+
"Red_Sea_crisis",
|
| 416 |
+
"Bab-el-Mandeb"
|
| 417 |
+
],
|
| 418 |
+
"top5": [
|
| 419 |
+
"Red_Sea_crisis",
|
| 420 |
+
"Bab-el-Mandeb",
|
| 421 |
+
"Suez_Canal",
|
| 422 |
+
"2021_Suez_Canal_obstruction",
|
| 423 |
+
"Ever_Given"
|
| 424 |
+
],
|
| 425 |
+
"ndcg@10": 1.0,
|
| 426 |
+
"recall@10": 1.0,
|
| 427 |
+
"precision@10": 0.2
|
| 428 |
+
},
|
| 429 |
+
"q6": {
|
| 430 |
+
"query": "Which foundry dominates advanced chip production?",
|
| 431 |
+
"gold": [
|
| 432 |
+
"TSMC",
|
| 433 |
+
"Semiconductor_industry"
|
| 434 |
+
],
|
| 435 |
+
"top5": [
|
| 436 |
+
"Semiconductor_industry",
|
| 437 |
+
"TSMC",
|
| 438 |
+
"Foxconn",
|
| 439 |
+
"2020\u20132023_global_chip_shortage",
|
| 440 |
+
"Samsung_Electronics"
|
| 441 |
+
],
|
| 442 |
+
"ndcg@10": 1.0,
|
| 443 |
+
"recall@10": 1.0,
|
| 444 |
+
"precision@10": 0.2
|
| 445 |
+
},
|
| 446 |
+
"q7": {
|
| 447 |
+
"query": "What is the bullwhip effect?",
|
| 448 |
+
"gold": [
|
| 449 |
+
"Bullwhip_effect"
|
| 450 |
+
],
|
| 451 |
+
"top5": [
|
| 452 |
+
"Bullwhip_effect",
|
| 453 |
+
"2020\u20132023_global_chip_shortage",
|
| 454 |
+
"Baltic_Dry_Index",
|
| 455 |
+
"Bab-el-Mandeb",
|
| 456 |
+
"Just-in-time_manufacturing"
|
| 457 |
+
],
|
| 458 |
+
"ndcg@10": 1.0,
|
| 459 |
+
"recall@10": 1.0,
|
| 460 |
+
"precision@10": 0.1
|
| 461 |
+
},
|
| 462 |
+
"q8": {
|
| 463 |
+
"query": "Which port congested during 2021 supply chain crisis?",
|
| 464 |
+
"gold": [
|
| 465 |
+
"Port_of_Los_Angeles"
|
| 466 |
+
],
|
| 467 |
+
"top5": [
|
| 468 |
+
"2020\u20132023_global_chip_shortage",
|
| 469 |
+
"2021_Suez_Canal_obstruction",
|
| 470 |
+
"Ever_Given",
|
| 471 |
+
"Port_of_Los_Angeles",
|
| 472 |
+
"Bab-el-Mandeb"
|
| 473 |
+
],
|
| 474 |
+
"ndcg@10": 0.43067655807339306,
|
| 475 |
+
"recall@10": 1.0,
|
| 476 |
+
"precision@10": 0.1
|
| 477 |
+
},
|
| 478 |
+
"q9": {
|
| 479 |
+
"query": "What is the just-in-time manufacturing philosophy?",
|
| 480 |
+
"gold": [
|
| 481 |
+
"Just-in-time_manufacturing"
|
| 482 |
+
],
|
| 483 |
+
"top5": [
|
| 484 |
+
"Just-in-time_manufacturing",
|
| 485 |
+
"Inventory",
|
| 486 |
+
"Supply_chain_management",
|
| 487 |
+
"Foxconn",
|
| 488 |
+
"Logistics"
|
| 489 |
+
],
|
| 490 |
+
"ndcg@10": 1.0,
|
| 491 |
+
"recall@10": 1.0,
|
| 492 |
+
"precision@10": 0.1
|
| 493 |
+
},
|
| 494 |
+
"q10": {
|
| 495 |
+
"query": "What does the CHIPS Act allocate?",
|
| 496 |
+
"gold": [
|
| 497 |
+
"CHIPS_and_Science_Act"
|
| 498 |
+
],
|
| 499 |
+
"top5": [
|
| 500 |
+
"CHIPS_and_Science_Act",
|
| 501 |
+
"2020\u20132023_global_chip_shortage",
|
| 502 |
+
"TSMC",
|
| 503 |
+
"Foxconn",
|
| 504 |
+
"Supply_chain_attack"
|
| 505 |
+
],
|
| 506 |
+
"ndcg@10": 1.0,
|
| 507 |
+
"recall@10": 1.0,
|
| 508 |
+
"precision@10": 0.1
|
| 509 |
+
},
|
| 510 |
+
"q11": {
|
| 511 |
+
"query": "Who is Foxconn's primary customer?",
|
| 512 |
+
"gold": [
|
| 513 |
+
"Foxconn"
|
| 514 |
+
],
|
| 515 |
+
"top5": [
|
| 516 |
+
"Foxconn",
|
| 517 |
+
"TSMC",
|
| 518 |
+
"Semiconductor_industry",
|
| 519 |
+
"Ever_Given",
|
| 520 |
+
"2021_Suez_Canal_obstruction"
|
| 521 |
+
],
|
| 522 |
+
"ndcg@10": 1.0,
|
| 523 |
+
"recall@10": 1.0,
|
| 524 |
+
"precision@10": 0.1
|
| 525 |
+
},
|
| 526 |
+
"q12": {
|
| 527 |
+
"query": "Why did the Ever Given run aground?",
|
| 528 |
+
"gold": [
|
| 529 |
+
"Ever_Given",
|
| 530 |
+
"2021_Suez_Canal_obstruction"
|
| 531 |
+
],
|
| 532 |
+
"top5": [
|
| 533 |
+
"Ever_Given",
|
| 534 |
+
"2021_Suez_Canal_obstruction",
|
| 535 |
+
"2011_T\u014dhoku_earthquake_and_tsunami",
|
| 536 |
+
"Bab-el-Mandeb",
|
| 537 |
+
"2020\u20132023_global_chip_shortage"
|
| 538 |
+
],
|
| 539 |
+
"ndcg@10": 1.0,
|
| 540 |
+
"recall@10": 1.0,
|
| 541 |
+
"precision@10": 0.2
|
| 542 |
+
},
|
| 543 |
+
"q13": {
|
| 544 |
+
"query": "What is safety stock?",
|
| 545 |
+
"gold": [
|
| 546 |
+
"Inventory"
|
| 547 |
+
],
|
| 548 |
+
"top5": [
|
| 549 |
+
"Inventory",
|
| 550 |
+
"Supply_chain_attack",
|
| 551 |
+
"TSMC",
|
| 552 |
+
"Warehouse",
|
| 553 |
+
"Port_of_Singapore"
|
| 554 |
+
],
|
| 555 |
+
"ndcg@10": 1.0,
|
| 556 |
+
"recall@10": 1.0,
|
| 557 |
+
"precision@10": 0.1
|
| 558 |
+
},
|
| 559 |
+
"q14": {
|
| 560 |
+
"query": "What is a supply chain attack?",
|
| 561 |
+
"gold": [
|
| 562 |
+
"Supply_chain_attack"
|
| 563 |
+
],
|
| 564 |
+
"top5": [
|
| 565 |
+
"Supply_chain_attack",
|
| 566 |
+
"Supply_chain_management",
|
| 567 |
+
"Bullwhip_effect",
|
| 568 |
+
"2020\u20132023_global_chip_shortage",
|
| 569 |
+
"Logistics"
|
| 570 |
+
],
|
| 571 |
+
"ndcg@10": 1.0,
|
| 572 |
+
"recall@10": 1.0,
|
| 573 |
+
"precision@10": 0.1
|
| 574 |
+
},
|
| 575 |
+
"q15": {
|
| 576 |
+
"query": "How busy is the Port of Singapore?",
|
| 577 |
+
"gold": [
|
| 578 |
+
"Port_of_Singapore"
|
| 579 |
+
],
|
| 580 |
+
"top5": [
|
| 581 |
+
"Port_of_Singapore",
|
| 582 |
+
"Port_of_Los_Angeles",
|
| 583 |
+
"Strait_of_Malacca",
|
| 584 |
+
"2021_Suez_Canal_obstruction",
|
| 585 |
+
"Container_ship"
|
| 586 |
+
],
|
| 587 |
+
"ndcg@10": 1.0,
|
| 588 |
+
"recall@10": 1.0,
|
| 589 |
+
"precision@10": 0.1
|
| 590 |
+
},
|
| 591 |
+
"q16": {
|
| 592 |
+
"query": "Which strait is a narrow Indonesia-Malaysia chokepoint?",
|
| 593 |
+
"gold": [
|
| 594 |
+
"Strait_of_Malacca"
|
| 595 |
+
],
|
| 596 |
+
"top5": [
|
| 597 |
+
"Strait_of_Malacca",
|
| 598 |
+
"Bab-el-Mandeb",
|
| 599 |
+
"Strait_of_Hormuz",
|
| 600 |
+
"Port_of_Singapore",
|
| 601 |
+
"Suez_Canal"
|
| 602 |
+
],
|
| 603 |
+
"ndcg@10": 1.0,
|
| 604 |
+
"recall@10": 1.0,
|
| 605 |
+
"precision@10": 0.1
|
| 606 |
+
},
|
| 607 |
+
"q17": {
|
| 608 |
+
"query": "Which industry does the Baltic Dry Index track?",
|
| 609 |
+
"gold": [
|
| 610 |
+
"Baltic_Dry_Index"
|
| 611 |
+
],
|
| 612 |
+
"top5": [
|
| 613 |
+
"Baltic_Dry_Index",
|
| 614 |
+
"Inventory",
|
| 615 |
+
"2020\u20132023_global_chip_shortage",
|
| 616 |
+
"Semiconductor_industry",
|
| 617 |
+
"Logistics"
|
| 618 |
+
],
|
| 619 |
+
"ndcg@10": 1.0,
|
| 620 |
+
"recall@10": 1.0,
|
| 621 |
+
"precision@10": 0.1
|
| 622 |
+
},
|
| 623 |
+
"q18": {
|
| 624 |
+
"query": "What function does a warehouse serve?",
|
| 625 |
+
"gold": [
|
| 626 |
+
"Warehouse"
|
| 627 |
+
],
|
| 628 |
+
"top5": [
|
| 629 |
+
"Warehouse",
|
| 630 |
+
"Inventory",
|
| 631 |
+
"Logistics",
|
| 632 |
+
"Container_ship",
|
| 633 |
+
"Port_of_Singapore"
|
| 634 |
+
],
|
| 635 |
+
"ndcg@10": 1.0,
|
| 636 |
+
"recall@10": 1.0,
|
| 637 |
+
"precision@10": 0.1
|
| 638 |
+
},
|
| 639 |
+
"q19": {
|
| 640 |
+
"query": "What is a container ship's TEU?",
|
| 641 |
+
"gold": [
|
| 642 |
+
"Container_ship"
|
| 643 |
+
],
|
| 644 |
+
"top5": [
|
| 645 |
+
"Container_ship",
|
| 646 |
+
"Ever_Given",
|
| 647 |
+
"2021_Suez_Canal_obstruction",
|
| 648 |
+
"Baltic_Dry_Index",
|
| 649 |
+
"Port_of_Singapore"
|
| 650 |
+
],
|
| 651 |
+
"ndcg@10": 1.0,
|
| 652 |
+
"recall@10": 1.0,
|
| 653 |
+
"precision@10": 0.1
|
| 654 |
+
},
|
| 655 |
+
"q20": {
|
| 656 |
+
"query": "What software replaces accounting + inventory + HR systems?",
|
| 657 |
+
"gold": [
|
| 658 |
+
"Enterprise_resource_planning"
|
| 659 |
+
],
|
| 660 |
+
"top5": [
|
| 661 |
+
"Enterprise_resource_planning",
|
| 662 |
+
"Inventory",
|
| 663 |
+
"Supply_chain_attack",
|
| 664 |
+
"Just-in-time_manufacturing",
|
| 665 |
+
"Foxconn"
|
| 666 |
+
],
|
| 667 |
+
"ndcg@10": 1.0,
|
| 668 |
+
"recall@10": 1.0,
|
| 669 |
+
"precision@10": 0.1
|
| 670 |
+
}
|
| 671 |
+
}
|
| 672 |
+
},
|
| 673 |
+
"snowflake-arctic-l": {
|
| 674 |
+
"embedder": "snowflake-arctic-l",
|
| 675 |
+
"mean_ndcg@10": 0.9709860394574094,
|
| 676 |
+
"mean_recall@10": 1.0,
|
| 677 |
+
"mean_precision@10": 0.12000000000000002,
|
| 678 |
+
"corpus_encoding_s": 40.3898344039917,
|
| 679 |
+
"n_queries": 20,
|
| 680 |
+
"per_query": {
|
| 681 |
+
"q1": {
|
| 682 |
+
"query": "What was the magnitude of the 2011 Tohoku earthquake?",
|
| 683 |
+
"gold": [
|
| 684 |
+
"2011_T\u014dhoku_earthquake_and_tsunami"
|
| 685 |
+
],
|
| 686 |
+
"top5": [
|
| 687 |
+
"2011_T\u014dhoku_earthquake_and_tsunami",
|
| 688 |
+
"Ever_Given",
|
| 689 |
+
"2021_Suez_Canal_obstruction",
|
| 690 |
+
"Samsung_Electronics",
|
| 691 |
+
"Suez_Canal"
|
| 692 |
+
],
|
| 693 |
+
"ndcg@10": 1.0,
|
| 694 |
+
"recall@10": 1.0,
|
| 695 |
+
"precision@10": 0.1
|
| 696 |
+
},
|
| 697 |
+
"q2": {
|
| 698 |
+
"query": "How long was the Suez Canal blocked in 2021?",
|
| 699 |
+
"gold": [
|
| 700 |
+
"2021_Suez_Canal_obstruction",
|
| 701 |
+
"Ever_Given"
|
| 702 |
+
],
|
| 703 |
+
"top5": [
|
| 704 |
+
"2021_Suez_Canal_obstruction",
|
| 705 |
+
"Suez_Canal",
|
| 706 |
+
"Ever_Given",
|
| 707 |
+
"Red_Sea_crisis",
|
| 708 |
+
"Bab-el-Mandeb"
|
| 709 |
+
],
|
| 710 |
+
"ndcg@10": 0.9197207891481876,
|
| 711 |
+
"recall@10": 1.0,
|
| 712 |
+
"precision@10": 0.2
|
| 713 |
+
},
|
| 714 |
+
"q3": {
|
| 715 |
+
"query": "What caused the global semiconductor shortage?",
|
| 716 |
+
"gold": [
|
| 717 |
+
"2020\u20132023_global_chip_shortage"
|
| 718 |
+
],
|
| 719 |
+
"top5": [
|
| 720 |
+
"2020\u20132023_global_chip_shortage",
|
| 721 |
+
"Semiconductor_industry",
|
| 722 |
+
"TSMC",
|
| 723 |
+
"Supply_chain_attack",
|
| 724 |
+
"Foxconn"
|
| 725 |
+
],
|
| 726 |
+
"ndcg@10": 1.0,
|
| 727 |
+
"recall@10": 1.0,
|
| 728 |
+
"precision@10": 0.1
|
| 729 |
+
},
|
| 730 |
+
"q4": {
|
| 731 |
+
"query": "Why is the Strait of Hormuz strategically important?",
|
| 732 |
+
"gold": [
|
| 733 |
+
"Strait_of_Hormuz"
|
| 734 |
+
],
|
| 735 |
+
"top5": [
|
| 736 |
+
"Strait_of_Hormuz",
|
| 737 |
+
"Strait_of_Malacca",
|
| 738 |
+
"Bab-el-Mandeb",
|
| 739 |
+
"Suez_Canal",
|
| 740 |
+
"Red_Sea_crisis"
|
| 741 |
+
],
|
| 742 |
+
"ndcg@10": 1.0,
|
| 743 |
+
"recall@10": 1.0,
|
| 744 |
+
"precision@10": 0.1
|
| 745 |
+
},
|
| 746 |
+
"q5": {
|
| 747 |
+
"query": "How do Houthis threaten Red Sea shipping?",
|
| 748 |
+
"gold": [
|
| 749 |
+
"Red_Sea_crisis",
|
| 750 |
+
"Bab-el-Mandeb"
|
| 751 |
+
],
|
| 752 |
+
"top5": [
|
| 753 |
+
"Red_Sea_crisis",
|
| 754 |
+
"Bab-el-Mandeb",
|
| 755 |
+
"Strait_of_Hormuz",
|
| 756 |
+
"Suez_Canal",
|
| 757 |
+
"2021_Suez_Canal_obstruction"
|
| 758 |
+
],
|
| 759 |
+
"ndcg@10": 1.0,
|
| 760 |
+
"recall@10": 1.0,
|
| 761 |
+
"precision@10": 0.2
|
| 762 |
+
},
|
| 763 |
+
"q6": {
|
| 764 |
+
"query": "Which foundry dominates advanced chip production?",
|
| 765 |
+
"gold": [
|
| 766 |
+
"TSMC",
|
| 767 |
+
"Semiconductor_industry"
|
| 768 |
+
],
|
| 769 |
+
"top5": [
|
| 770 |
+
"Semiconductor_industry",
|
| 771 |
+
"TSMC",
|
| 772 |
+
"2020\u20132023_global_chip_shortage",
|
| 773 |
+
"Foxconn",
|
| 774 |
+
"CHIPS_and_Science_Act"
|
| 775 |
+
],
|
| 776 |
+
"ndcg@10": 1.0,
|
| 777 |
+
"recall@10": 1.0,
|
| 778 |
+
"precision@10": 0.2
|
| 779 |
+
},
|
| 780 |
+
"q7": {
|
| 781 |
+
"query": "What is the bullwhip effect?",
|
| 782 |
+
"gold": [
|
| 783 |
+
"Bullwhip_effect"
|
| 784 |
+
],
|
| 785 |
+
"top5": [
|
| 786 |
+
"Bullwhip_effect",
|
| 787 |
+
"Just-in-time_manufacturing",
|
| 788 |
+
"Baltic_Dry_Index",
|
| 789 |
+
"Inventory",
|
| 790 |
+
"Bab-el-Mandeb"
|
| 791 |
+
],
|
| 792 |
+
"ndcg@10": 1.0,
|
| 793 |
+
"recall@10": 1.0,
|
| 794 |
+
"precision@10": 0.1
|
| 795 |
+
},
|
| 796 |
+
"q8": {
|
| 797 |
+
"query": "Which port congested during 2021 supply chain crisis?",
|
| 798 |
+
"gold": [
|
| 799 |
+
"Port_of_Los_Angeles"
|
| 800 |
+
],
|
| 801 |
+
"top5": [
|
| 802 |
+
"2020\u20132023_global_chip_shortage",
|
| 803 |
+
"2021_Suez_Canal_obstruction",
|
| 804 |
+
"Port_of_Los_Angeles",
|
| 805 |
+
"Ever_Given",
|
| 806 |
+
"Supply_chain_attack"
|
| 807 |
+
],
|
| 808 |
+
"ndcg@10": 0.5,
|
| 809 |
+
"recall@10": 1.0,
|
| 810 |
+
"precision@10": 0.1
|
| 811 |
+
},
|
| 812 |
+
"q9": {
|
| 813 |
+
"query": "What is the just-in-time manufacturing philosophy?",
|
| 814 |
+
"gold": [
|
| 815 |
+
"Just-in-time_manufacturing"
|
| 816 |
+
],
|
| 817 |
+
"top5": [
|
| 818 |
+
"Just-in-time_manufacturing",
|
| 819 |
+
"Supply_chain_management",
|
| 820 |
+
"Inventory",
|
| 821 |
+
"Logistics",
|
| 822 |
+
"Semiconductor_industry"
|
| 823 |
+
],
|
| 824 |
+
"ndcg@10": 1.0,
|
| 825 |
+
"recall@10": 1.0,
|
| 826 |
+
"precision@10": 0.1
|
| 827 |
+
},
|
| 828 |
+
"q10": {
|
| 829 |
+
"query": "What does the CHIPS Act allocate?",
|
| 830 |
+
"gold": [
|
| 831 |
+
"CHIPS_and_Science_Act"
|
| 832 |
+
],
|
| 833 |
+
"top5": [
|
| 834 |
+
"CHIPS_and_Science_Act",
|
| 835 |
+
"2020\u20132023_global_chip_shortage",
|
| 836 |
+
"Semiconductor_industry",
|
| 837 |
+
"TSMC",
|
| 838 |
+
"Supply_chain_attack"
|
| 839 |
+
],
|
| 840 |
+
"ndcg@10": 1.0,
|
| 841 |
+
"recall@10": 1.0,
|
| 842 |
+
"precision@10": 0.1
|
| 843 |
+
},
|
| 844 |
+
"q11": {
|
| 845 |
+
"query": "Who is Foxconn's primary customer?",
|
| 846 |
+
"gold": [
|
| 847 |
+
"Foxconn"
|
| 848 |
+
],
|
| 849 |
+
"top5": [
|
| 850 |
+
"Foxconn",
|
| 851 |
+
"TSMC",
|
| 852 |
+
"Semiconductor_industry",
|
| 853 |
+
"2020\u20132023_global_chip_shortage",
|
| 854 |
+
"Supply_chain_management"
|
| 855 |
+
],
|
| 856 |
+
"ndcg@10": 1.0,
|
| 857 |
+
"recall@10": 1.0,
|
| 858 |
+
"precision@10": 0.1
|
| 859 |
+
},
|
| 860 |
+
"q12": {
|
| 861 |
+
"query": "Why did the Ever Given run aground?",
|
| 862 |
+
"gold": [
|
| 863 |
+
"Ever_Given",
|
| 864 |
+
"2021_Suez_Canal_obstruction"
|
| 865 |
+
],
|
| 866 |
+
"top5": [
|
| 867 |
+
"Ever_Given",
|
| 868 |
+
"2021_Suez_Canal_obstruction",
|
| 869 |
+
"Bab-el-Mandeb",
|
| 870 |
+
"Strait_of_Hormuz",
|
| 871 |
+
"Container_ship"
|
| 872 |
+
],
|
| 873 |
+
"ndcg@10": 1.0,
|
| 874 |
+
"recall@10": 1.0,
|
| 875 |
+
"precision@10": 0.2
|
| 876 |
+
},
|
| 877 |
+
"q13": {
|
| 878 |
+
"query": "What is safety stock?",
|
| 879 |
+
"gold": [
|
| 880 |
+
"Inventory"
|
| 881 |
+
],
|
| 882 |
+
"top5": [
|
| 883 |
+
"Inventory",
|
| 884 |
+
"Supply_chain_attack",
|
| 885 |
+
"Bullwhip_effect",
|
| 886 |
+
"Logistics",
|
| 887 |
+
"Baltic_Dry_Index"
|
| 888 |
+
],
|
| 889 |
+
"ndcg@10": 1.0,
|
| 890 |
+
"recall@10": 1.0,
|
| 891 |
+
"precision@10": 0.1
|
| 892 |
+
},
|
| 893 |
+
"q14": {
|
| 894 |
+
"query": "What is a supply chain attack?",
|
| 895 |
+
"gold": [
|
| 896 |
+
"Supply_chain_attack"
|
| 897 |
+
],
|
| 898 |
+
"top5": [
|
| 899 |
+
"Supply_chain_attack",
|
| 900 |
+
"Supply_chain_management",
|
| 901 |
+
"Bullwhip_effect",
|
| 902 |
+
"Logistics",
|
| 903 |
+
"2020\u20132023_global_chip_shortage"
|
| 904 |
+
],
|
| 905 |
+
"ndcg@10": 1.0,
|
| 906 |
+
"recall@10": 1.0,
|
| 907 |
+
"precision@10": 0.1
|
| 908 |
+
},
|
| 909 |
+
"q15": {
|
| 910 |
+
"query": "How busy is the Port of Singapore?",
|
| 911 |
+
"gold": [
|
| 912 |
+
"Port_of_Singapore"
|
| 913 |
+
],
|
| 914 |
+
"top5": [
|
| 915 |
+
"Port_of_Singapore",
|
| 916 |
+
"Strait_of_Malacca",
|
| 917 |
+
"Port_of_Los_Angeles",
|
| 918 |
+
"Container_ship",
|
| 919 |
+
"2021_Suez_Canal_obstruction"
|
| 920 |
+
],
|
| 921 |
+
"ndcg@10": 1.0,
|
| 922 |
+
"recall@10": 1.0,
|
| 923 |
+
"precision@10": 0.1
|
| 924 |
+
},
|
| 925 |
+
"q16": {
|
| 926 |
+
"query": "Which strait is a narrow Indonesia-Malaysia chokepoint?",
|
| 927 |
+
"gold": [
|
| 928 |
+
"Strait_of_Malacca"
|
| 929 |
+
],
|
| 930 |
+
"top5": [
|
| 931 |
+
"Strait_of_Malacca",
|
| 932 |
+
"Strait_of_Hormuz",
|
| 933 |
+
"Bab-el-Mandeb",
|
| 934 |
+
"Port_of_Singapore",
|
| 935 |
+
"Suez_Canal"
|
| 936 |
+
],
|
| 937 |
+
"ndcg@10": 1.0,
|
| 938 |
+
"recall@10": 1.0,
|
| 939 |
+
"precision@10": 0.1
|
| 940 |
+
},
|
| 941 |
+
"q17": {
|
| 942 |
+
"query": "Which industry does the Baltic Dry Index track?",
|
| 943 |
+
"gold": [
|
| 944 |
+
"Baltic_Dry_Index"
|
| 945 |
+
],
|
| 946 |
+
"top5": [
|
| 947 |
+
"Baltic_Dry_Index",
|
| 948 |
+
"Inventory",
|
| 949 |
+
"Logistics",
|
| 950 |
+
"Semiconductor_industry",
|
| 951 |
+
"Enterprise_resource_planning"
|
| 952 |
+
],
|
| 953 |
+
"ndcg@10": 1.0,
|
| 954 |
+
"recall@10": 1.0,
|
| 955 |
+
"precision@10": 0.1
|
| 956 |
+
},
|
| 957 |
+
"q18": {
|
| 958 |
+
"query": "What function does a warehouse serve?",
|
| 959 |
+
"gold": [
|
| 960 |
+
"Warehouse"
|
| 961 |
+
],
|
| 962 |
+
"top5": [
|
| 963 |
+
"Warehouse",
|
| 964 |
+
"Inventory",
|
| 965 |
+
"Logistics",
|
| 966 |
+
"Supply_chain_management",
|
| 967 |
+
"Enterprise_resource_planning"
|
| 968 |
+
],
|
| 969 |
+
"ndcg@10": 1.0,
|
| 970 |
+
"recall@10": 1.0,
|
| 971 |
+
"precision@10": 0.1
|
| 972 |
+
},
|
| 973 |
+
"q19": {
|
| 974 |
+
"query": "What is a container ship's TEU?",
|
| 975 |
+
"gold": [
|
| 976 |
+
"Container_ship"
|
| 977 |
+
],
|
| 978 |
+
"top5": [
|
| 979 |
+
"Container_ship",
|
| 980 |
+
"Ever_Given",
|
| 981 |
+
"Inventory",
|
| 982 |
+
"2021_Suez_Canal_obstruction",
|
| 983 |
+
"Baltic_Dry_Index"
|
| 984 |
+
],
|
| 985 |
+
"ndcg@10": 1.0,
|
| 986 |
+
"recall@10": 1.0,
|
| 987 |
+
"precision@10": 0.1
|
| 988 |
+
},
|
| 989 |
+
"q20": {
|
| 990 |
+
"query": "What software replaces accounting + inventory + HR systems?",
|
| 991 |
+
"gold": [
|
| 992 |
+
"Enterprise_resource_planning"
|
| 993 |
+
],
|
| 994 |
+
"top5": [
|
| 995 |
+
"Enterprise_resource_planning",
|
| 996 |
+
"Inventory",
|
| 997 |
+
"Supply_chain_management",
|
| 998 |
+
"Logistics",
|
| 999 |
+
"Supply_chain_attack"
|
| 1000 |
+
],
|
| 1001 |
+
"ndcg@10": 1.0,
|
| 1002 |
+
"recall@10": 1.0,
|
| 1003 |
+
"precision@10": 0.1
|
| 1004 |
+
}
|
| 1005 |
+
}
|
| 1006 |
+
}
|
| 1007 |
+
},
|
| 1008 |
+
"public_ref_nfcorpus": {
|
| 1009 |
+
"mxbai-embed-large-v1": {
|
| 1010 |
+
"ndcg@10_nfcorpus": 0.386,
|
| 1011 |
+
"source": "MTEB retrieval leaderboard 2024"
|
| 1012 |
+
},
|
| 1013 |
+
"bge-m3": {
|
| 1014 |
+
"ndcg@10_nfcorpus": 0.357,
|
| 1015 |
+
"source": "BGE-M3 paper + MTEB"
|
| 1016 |
+
},
|
| 1017 |
+
"snowflake-arctic-l": {
|
| 1018 |
+
"ndcg@10_nfcorpus": 0.348,
|
| 1019 |
+
"source": "Snowflake Arctic paper"
|
| 1020 |
+
}
|
| 1021 |
+
},
|
| 1022 |
+
"elapsed_min": 1.861957597732544
|
| 1023 |
}
|
FINAL_SUBMIT/receipts/R5_GRANITE.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
FINAL_SUBMIT/receipts/R5_GRANITE_HARD.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
FINAL_SUBMIT/receipts/R6_ALGO_COMPARISON.json
CHANGED
|
@@ -1,72 +1,72 @@
|
|
| 1 |
-
{
|
| 2 |
-
"task": "easy_typhoon_response",
|
| 3 |
-
"training_timesteps": 100000,
|
| 4 |
-
"eval_episodes": 50,
|
| 5 |
-
"per_algorithm": {
|
| 6 |
-
"MaskablePPO": {
|
| 7 |
-
"algorithm": "MaskablePPO",
|
| 8 |
-
"n_episodes": 50,
|
| 9 |
-
"reward_mean": 1.2005000000000001,
|
| 10 |
-
"reward_std": 0.19939637032804786,
|
| 11 |
-
"reward_min": 0.643,
|
| 12 |
-
"reward_max": 1.3435000000000004,
|
| 13 |
-
"length_mean": 20.0,
|
| 14 |
-
"violations_mean": 0.0,
|
| 15 |
-
"invalid_action_picks_mean_per_ep": 0.0
|
| 16 |
-
},
|
| 17 |
-
"PPO": {
|
| 18 |
-
"algorithm": "PPO",
|
| 19 |
-
"n_episodes": 50,
|
| 20 |
-
"reward_mean": 0.9470000000000001,
|
| 21 |
-
"reward_std": 0.1244727781484771,
|
| 22 |
-
"reward_min": 0.5895,
|
| 23 |
-
"reward_max": 1.0760000000000003,
|
| 24 |
-
"length_mean": 20.0,
|
| 25 |
-
"violations_mean": 0.0,
|
| 26 |
-
"invalid_action_picks_mean_per_ep": 13.64
|
| 27 |
-
},
|
| 28 |
-
"A2C": {
|
| 29 |
-
"algorithm": "A2C",
|
| 30 |
-
"n_episodes": 50,
|
| 31 |
-
"reward_mean": 0.8738700000000001,
|
| 32 |
-
"reward_std": 0.11796597221232909,
|
| 33 |
-
"reward_min": 0.5359999999999999,
|
| 34 |
-
"reward_max": 0.9690000000000002,
|
| 35 |
-
"length_mean": 20.0,
|
| 36 |
-
"violations_mean": 0.0,
|
| 37 |
-
"invalid_action_picks_mean_per_ep": 13.88
|
| 38 |
-
},
|
| 39 |
-
"RecurrentPPO": {
|
| 40 |
-
"algorithm": "RecurrentPPO",
|
| 41 |
-
"n_episodes": 50,
|
| 42 |
-
"reward_mean": 1.0806900000000002,
|
| 43 |
-
"reward_std": 0.19626869694375626,
|
| 44 |
-
"reward_min": 0.7499999999999999,
|
| 45 |
-
"reward_max": 1.3470000000000004,
|
| 46 |
-
"length_mean": 20.0,
|
| 47 |
-
"violations_mean": 0.0,
|
| 48 |
-
"invalid_action_picks_mean_per_ep": 14.86
|
| 49 |
-
}
|
| 50 |
-
},
|
| 51 |
-
"train_times_min": {
|
| 52 |
-
"MaskablePPO": 10.99298940896988,
|
| 53 |
-
"PPO": 8.347426931063334,
|
| 54 |
-
"A2C": 9.913969707489013,
|
| 55 |
-
"RecurrentPPO": 16.337928581237794
|
| 56 |
-
},
|
| 57 |
-
"maskable_vs_others": {
|
| 58 |
-
"PPO": {
|
| 59 |
-
"reward_delta": -0.25350000000000006,
|
| 60 |
-
"maskable_lift_pct": 26.768743400211196
|
| 61 |
-
},
|
| 62 |
-
"A2C": {
|
| 63 |
-
"reward_delta": -0.32663,
|
| 64 |
-
"maskable_lift_pct": 37.377413116367414
|
| 65 |
-
},
|
| 66 |
-
"RecurrentPPO": {
|
| 67 |
-
"reward_delta": -0.11980999999999997,
|
| 68 |
-
"maskable_lift_pct": 11.08643551804865
|
| 69 |
-
}
|
| 70 |
-
},
|
| 71 |
-
"elapsed_min": 45.86821995576223
|
| 72 |
}
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "easy_typhoon_response",
|
| 3 |
+
"training_timesteps": 100000,
|
| 4 |
+
"eval_episodes": 50,
|
| 5 |
+
"per_algorithm": {
|
| 6 |
+
"MaskablePPO": {
|
| 7 |
+
"algorithm": "MaskablePPO",
|
| 8 |
+
"n_episodes": 50,
|
| 9 |
+
"reward_mean": 1.2005000000000001,
|
| 10 |
+
"reward_std": 0.19939637032804786,
|
| 11 |
+
"reward_min": 0.643,
|
| 12 |
+
"reward_max": 1.3435000000000004,
|
| 13 |
+
"length_mean": 20.0,
|
| 14 |
+
"violations_mean": 0.0,
|
| 15 |
+
"invalid_action_picks_mean_per_ep": 0.0
|
| 16 |
+
},
|
| 17 |
+
"PPO": {
|
| 18 |
+
"algorithm": "PPO",
|
| 19 |
+
"n_episodes": 50,
|
| 20 |
+
"reward_mean": 0.9470000000000001,
|
| 21 |
+
"reward_std": 0.1244727781484771,
|
| 22 |
+
"reward_min": 0.5895,
|
| 23 |
+
"reward_max": 1.0760000000000003,
|
| 24 |
+
"length_mean": 20.0,
|
| 25 |
+
"violations_mean": 0.0,
|
| 26 |
+
"invalid_action_picks_mean_per_ep": 13.64
|
| 27 |
+
},
|
| 28 |
+
"A2C": {
|
| 29 |
+
"algorithm": "A2C",
|
| 30 |
+
"n_episodes": 50,
|
| 31 |
+
"reward_mean": 0.8738700000000001,
|
| 32 |
+
"reward_std": 0.11796597221232909,
|
| 33 |
+
"reward_min": 0.5359999999999999,
|
| 34 |
+
"reward_max": 0.9690000000000002,
|
| 35 |
+
"length_mean": 20.0,
|
| 36 |
+
"violations_mean": 0.0,
|
| 37 |
+
"invalid_action_picks_mean_per_ep": 13.88
|
| 38 |
+
},
|
| 39 |
+
"RecurrentPPO": {
|
| 40 |
+
"algorithm": "RecurrentPPO",
|
| 41 |
+
"n_episodes": 50,
|
| 42 |
+
"reward_mean": 1.0806900000000002,
|
| 43 |
+
"reward_std": 0.19626869694375626,
|
| 44 |
+
"reward_min": 0.7499999999999999,
|
| 45 |
+
"reward_max": 1.3470000000000004,
|
| 46 |
+
"length_mean": 20.0,
|
| 47 |
+
"violations_mean": 0.0,
|
| 48 |
+
"invalid_action_picks_mean_per_ep": 14.86
|
| 49 |
+
}
|
| 50 |
+
},
|
| 51 |
+
"train_times_min": {
|
| 52 |
+
"MaskablePPO": 10.99298940896988,
|
| 53 |
+
"PPO": 8.347426931063334,
|
| 54 |
+
"A2C": 9.913969707489013,
|
| 55 |
+
"RecurrentPPO": 16.337928581237794
|
| 56 |
+
},
|
| 57 |
+
"maskable_vs_others": {
|
| 58 |
+
"PPO": {
|
| 59 |
+
"reward_delta": -0.25350000000000006,
|
| 60 |
+
"maskable_lift_pct": 26.768743400211196
|
| 61 |
+
},
|
| 62 |
+
"A2C": {
|
| 63 |
+
"reward_delta": -0.32663,
|
| 64 |
+
"maskable_lift_pct": 37.377413116367414
|
| 65 |
+
},
|
| 66 |
+
"RecurrentPPO": {
|
| 67 |
+
"reward_delta": -0.11980999999999997,
|
| 68 |
+
"maskable_lift_pct": 11.08643551804865
|
| 69 |
+
}
|
| 70 |
+
},
|
| 71 |
+
"elapsed_min": 45.86821995576223
|
| 72 |
}
|
FINAL_SUBMIT/receipts/R6_AQUA_REGIA_V2.json
CHANGED
|
@@ -1,860 +1,860 @@
|
|
| 1 |
-
{
|
| 2 |
-
"targets": [
|
| 3 |
-
"DCOILWTICO",
|
| 4 |
-
"DEXJPUS",
|
| 5 |
-
"DEXUSEU",
|
| 6 |
-
"DEXCHUS",
|
| 7 |
-
"DEXKOUS"
|
| 8 |
-
],
|
| 9 |
-
"horizon": 14,
|
| 10 |
-
"confs": [
|
| 11 |
-
0.8,
|
| 12 |
-
0.9,
|
| 13 |
-
0.95
|
| 14 |
-
],
|
| 15 |
-
"n_cal": 30,
|
| 16 |
-
"n_test": 30,
|
| 17 |
-
"results": {
|
| 18 |
-
"DCOILWTICO": {
|
| 19 |
-
"arima": {
|
| 20 |
-
"forecaster": "arima",
|
| 21 |
-
"n_cal": 30,
|
| 22 |
-
"n_test": 30,
|
| 23 |
-
"conf=0.8": {
|
| 24 |
-
"nominal_coverage": 0.8,
|
| 25 |
-
"bare_coverage_mean": 0.8095238095238094,
|
| 26 |
-
"bare_width_mean": 10.867942261555571,
|
| 27 |
-
"perhorizon_coverage_mean": 0.6857142857142856,
|
| 28 |
-
"perhorizon_width_mean": 7.990994504643288,
|
| 29 |
-
"pooled_coverage_mean": 0.6785714285714285,
|
| 30 |
-
"pooled_width_mean": 8.029568159989491,
|
| 31 |
-
"q_per_horizon": [
|
| 32 |
-
2.0917427692512547,
|
| 33 |
-
2.414564146929898,
|
| 34 |
-
3.49864771255762,
|
| 35 |
-
3.783403014989574,
|
| 36 |
-
3.6514825270864293,
|
| 37 |
-
3.410638918826429,
|
| 38 |
-
3.6483267386695672,
|
| 39 |
-
4.291356370865486,
|
| 40 |
-
4.148100512774434,
|
| 41 |
-
4.765242660767733,
|
| 42 |
-
4.798738782538393,
|
| 43 |
-
4.648753353034714,
|
| 44 |
-
5.111777984600735,
|
| 45 |
-
5.674186039610767
|
| 46 |
-
],
|
| 47 |
-
"q_pooled": 4.014784079994747
|
| 48 |
-
},
|
| 49 |
-
"conf=0.9": {
|
| 50 |
-
"nominal_coverage": 0.9,
|
| 51 |
-
"bare_coverage_mean": 0.9214285714285715,
|
| 52 |
-
"bare_width_mean": 13.948852880392929,
|
| 53 |
-
"perhorizon_coverage_mean": 0.7809523809523811,
|
| 54 |
-
"perhorizon_width_mean": 10.031165041917506,
|
| 55 |
-
"pooled_coverage_mean": 0.7738095238095238,
|
| 56 |
-
"pooled_width_mean": 10.167074585069713,
|
| 57 |
-
"q_per_horizon": [
|
| 58 |
-
2.300277140003125,
|
| 59 |
-
4.097940221459595,
|
| 60 |
-
4.076376633492892,
|
| 61 |
-
4.703831136719856,
|
| 62 |
-
4.842398951063927,
|
| 63 |
-
5.337677242975467,
|
| 64 |
-
4.359396527417836,
|
| 65 |
-
6.151868291801264,
|
| 66 |
-
5.051950062063291,
|
| 67 |
-
5.854070590337393,
|
| 68 |
-
5.368481950759772,
|
| 69 |
-
5.284114635080698,
|
| 70 |
-
6.431339982770957,
|
| 71 |
-
6.3584319274764525
|
| 72 |
-
],
|
| 73 |
-
"q_pooled": 5.0835372925348565
|
| 74 |
-
},
|
| 75 |
-
"conf=0.95": {
|
| 76 |
-
"nominal_coverage": 0.95,
|
| 77 |
-
"bare_coverage_mean": 0.9452380952380951,
|
| 78 |
-
"bare_width_mean": 16.621083373775793,
|
| 79 |
-
"perhorizon_coverage_mean": 0.9261904761904761,
|
| 80 |
-
"perhorizon_width_mean": 14.611219531249459,
|
| 81 |
-
"pooled_coverage_mean": 0.838095238095238,
|
| 82 |
-
"pooled_width_mean": 12.16250013730463,
|
| 83 |
-
"q_per_horizon": [
|
| 84 |
-
3.0531114213612582,
|
| 85 |
-
5.059338828648023,
|
| 86 |
-
5.697604686526287,
|
| 87 |
-
7.146009479872129,
|
| 88 |
-
5.3182905673299175,
|
| 89 |
-
7.39090190741959,
|
| 90 |
-
6.856329650125417,
|
| 91 |
-
7.199424687832007,
|
| 92 |
-
6.523429069811058,
|
| 93 |
-
6.548845442730201,
|
| 94 |
-
9.62406528058468,
|
| 95 |
-
8.603787092463286,
|
| 96 |
-
11.553679176235391,
|
| 97 |
-
11.703719427806988
|
| 98 |
-
],
|
| 99 |
-
"q_pooled": 6.0812500686523165
|
| 100 |
-
}
|
| 101 |
-
},
|
| 102 |
-
"chronos": {
|
| 103 |
-
"forecaster": "chronos",
|
| 104 |
-
"n_cal": 30,
|
| 105 |
-
"n_test": 30,
|
| 106 |
-
"conf=0.8": {
|
| 107 |
-
"nominal_coverage": 0.8,
|
| 108 |
-
"bare_coverage_mean": 0.7809523809523807,
|
| 109 |
-
"bare_width_mean": 11.050525585810343,
|
| 110 |
-
"perhorizon_coverage_mean": 0.6547619047619048,
|
| 111 |
-
"perhorizon_width_mean": 8.338129283360074,
|
| 112 |
-
"pooled_coverage_mean": 0.6452380952380952,
|
| 113 |
-
"pooled_width_mean": 8.036834106445315,
|
| 114 |
-
"q_per_horizon": [
|
| 115 |
-
2.1229774475097685,
|
| 116 |
-
2.4522241210937494,
|
| 117 |
-
3.261205139160154,
|
| 118 |
-
3.9071347045898435,
|
| 119 |
-
3.614091110229495,
|
| 120 |
-
3.6567034912109406,
|
| 121 |
-
3.993652496337887,
|
| 122 |
-
4.4286404418945295,
|
| 123 |
-
4.545238494873047,
|
| 124 |
-
5.274034423828127,
|
| 125 |
-
5.24025115966797,
|
| 126 |
-
4.8420919799804665,
|
| 127 |
-
5.316376342773438,
|
| 128 |
-
5.71228363037109
|
| 129 |
-
],
|
| 130 |
-
"q_pooled": 4.018417053222656
|
| 131 |
-
},
|
| 132 |
-
"conf=0.9": {
|
| 133 |
-
"nominal_coverage": 0.9,
|
| 134 |
-
"bare_coverage_mean": 0.7809523809523807,
|
| 135 |
-
"bare_width_mean": 11.050525585810343,
|
| 136 |
-
"perhorizon_coverage_mean": 0.7880952380952381,
|
| 137 |
-
"perhorizon_width_mean": 11.069673222133089,
|
| 138 |
-
"pooled_coverage_mean": 0.769047619047619,
|
| 139 |
-
"pooled_width_mean": 10.63275268554687,
|
| 140 |
-
"q_per_horizon": [
|
| 141 |
-
2.555929565429693,
|
| 142 |
-
3.5912300109863295,
|
| 143 |
-
4.3903402709960915,
|
| 144 |
-
5.24416809082031,
|
| 145 |
-
4.982480926513674,
|
| 146 |
-
5.137361450195314,
|
| 147 |
-
5.586841278076172,
|
| 148 |
-
6.765305328369138,
|
| 149 |
-
6.67245574951172,
|
| 150 |
-
5.990972595214842,
|
| 151 |
-
5.718290405273436,
|
| 152 |
-
5.943902282714845,
|
| 153 |
-
7.989523162841799,
|
| 154 |
-
6.918911437988278
|
| 155 |
-
],
|
| 156 |
-
"q_pooled": 5.316376342773438
|
| 157 |
-
},
|
| 158 |
-
"conf=0.95": {
|
| 159 |
-
"nominal_coverage": 0.95,
|
| 160 |
-
"bare_coverage_mean": 0.7809523809523807,
|
| 161 |
-
"bare_width_mean": 11.050525585810343,
|
| 162 |
-
"perhorizon_coverage_mean": 0.9261904761904761,
|
| 163 |
-
"perhorizon_width_mean": 16.372548740931915,
|
| 164 |
-
"pooled_coverage_mean": 0.8547619047619047,
|
| 165 |
-
"pooled_width_mean": 13.761851806640617,
|
| 166 |
-
"q_per_horizon": [
|
| 167 |
-
4.500623779296873,
|
| 168 |
-
5.796702575683597,
|
| 169 |
-
4.578687438964849,
|
| 170 |
-
5.983569641113277,
|
| 171 |
-
7.369260253906248,
|
| 172 |
-
8.649095764160151,
|
| 173 |
-
8.18119262695312,
|
| 174 |
-
9.151351928710938,
|
| 175 |
-
8.256888427734381,
|
| 176 |
-
8.666538696289066,
|
| 177 |
-
10.109675750732421,
|
| 178 |
-
9.065566864013675,
|
| 179 |
-
12.079234161376952,
|
| 180 |
-
12.219453277587888
|
| 181 |
-
],
|
| 182 |
-
"q_pooled": 6.8809259033203105
|
| 183 |
-
}
|
| 184 |
-
}
|
| 185 |
-
},
|
| 186 |
-
"DEXJPUS": {
|
| 187 |
-
"arima": {
|
| 188 |
-
"forecaster": "arima",
|
| 189 |
-
"n_cal": 30,
|
| 190 |
-
"n_test": 30,
|
| 191 |
-
"conf=0.8": {
|
| 192 |
-
"nominal_coverage": 0.8,
|
| 193 |
-
"bare_coverage_mean": 0.6357142857142856,
|
| 194 |
-
"bare_width_mean": 4.436568793595841,
|
| 195 |
-
"perhorizon_coverage_mean": 0.45238095238095233,
|
| 196 |
-
"perhorizon_width_mean": 2.8685092642157013,
|
| 197 |
-
"pooled_coverage_mean": 0.4928571428571428,
|
| 198 |
-
"pooled_width_mean": 2.791173769264077,
|
| 199 |
-
"q_per_horizon": [
|
| 200 |
-
0.495163456754355,
|
| 201 |
-
0.8623131555344372,
|
| 202 |
-
0.8897926642558076,
|
| 203 |
-
1.1482011742546945,
|
| 204 |
-
1.28795516679331,
|
| 205 |
-
1.6477655987067266,
|
| 206 |
-
1.7443474583408118,
|
| 207 |
-
1.5384895904415004,
|
| 208 |
-
1.803162688834604,
|
| 209 |
-
1.7685075068830685,
|
| 210 |
-
1.7186420091775432,
|
| 211 |
-
1.5470661555772267,
|
| 212 |
-
1.888659928991629,
|
| 213 |
-
1.7394982949641928
|
| 214 |
-
],
|
| 215 |
-
"q_pooled": 1.3955868846320385
|
| 216 |
-
},
|
| 217 |
-
"conf=0.9": {
|
| 218 |
-
"nominal_coverage": 0.9,
|
| 219 |
-
"bare_coverage_mean": 0.7738095238095236,
|
| 220 |
-
"bare_width_mean": 5.694274399535953,
|
| 221 |
-
"perhorizon_coverage_mean": 0.5761904761904761,
|
| 222 |
-
"perhorizon_width_mean": 3.798189452444865,
|
| 223 |
-
"pooled_coverage_mean": 0.5809523809523809,
|
| 224 |
-
"pooled_width_mean": 3.8189608293080823,
|
| 225 |
-
"q_per_horizon": [
|
| 226 |
-
0.602618663621783,
|
| 227 |
-
1.5464872564533323,
|
| 228 |
-
1.410577522130609,
|
| 229 |
-
2.006457013067674,
|
| 230 |
-
1.9326982798289691,
|
| 231 |
-
1.871741039728505,
|
| 232 |
-
1.8724724170933484,
|
| 233 |
-
2.0184353738183205,
|
| 234 |
-
2.057205707305812,
|
| 235 |
-
2.300998677577681,
|
| 236 |
-
2.4584763121956854,
|
| 237 |
-
2.2610349692604643,
|
| 238 |
-
2.141044083930069,
|
| 239 |
-
2.1070788511018037
|
| 240 |
-
],
|
| 241 |
-
"q_pooled": 1.9094804146540412
|
| 242 |
-
},
|
| 243 |
-
"conf=0.95": {
|
| 244 |
-
"nominal_coverage": 0.95,
|
| 245 |
-
"bare_coverage_mean": 0.8738095238095237,
|
| 246 |
-
"bare_width_mean": 6.7851464460479765,
|
| 247 |
-
"perhorizon_coverage_mean": 0.8023809523809523,
|
| 248 |
-
"perhorizon_width_mean": 6.101635459825262,
|
| 249 |
-
"pooled_coverage_mean": 0.6571428571428571,
|
| 250 |
-
"pooled_width_mean": 4.601997355155362,
|
| 251 |
-
"q_per_horizon": [
|
| 252 |
-
0.9380858484970958,
|
| 253 |
-
2.323515167056655,
|
| 254 |
-
1.946219636173069,
|
| 255 |
-
2.2116051075864647,
|
| 256 |
-
2.7206754280723686,
|
| 257 |
-
3.562227529556367,
|
| 258 |
-
3.502961358052417,
|
| 259 |
-
3.5922479170316564,
|
| 260 |
-
4.142317883234554,
|
| 261 |
-
4.062380770386838,
|
| 262 |
-
3.5722844723094056,
|
| 263 |
-
3.2623018774721544,
|
| 264 |
-
3.212317495709044,
|
| 265 |
-
3.6623077276387335
|
| 266 |
-
],
|
| 267 |
-
"q_pooled": 2.300998677577681
|
| 268 |
-
}
|
| 269 |
-
},
|
| 270 |
-
"chronos": {
|
| 271 |
-
"forecaster": "chronos",
|
| 272 |
-
"n_cal": 30,
|
| 273 |
-
"n_test": 30,
|
| 274 |
-
"conf=0.8": {
|
| 275 |
-
"nominal_coverage": 0.8,
|
| 276 |
-
"bare_coverage_mean": 0.7309523809523808,
|
| 277 |
-
"bare_width_mean": 5.977349718411763,
|
| 278 |
-
"perhorizon_coverage_mean": 0.47380952380952385,
|
| 279 |
-
"perhorizon_width_mean": 3.038026166643411,
|
| 280 |
-
"pooled_coverage_mean": 0.49761904761904757,
|
| 281 |
-
"pooled_width_mean": 2.8918725585937466,
|
| 282 |
-
"q_per_horizon": [
|
| 283 |
-
0.5868325805664085,
|
| 284 |
-
0.8268566894531233,
|
| 285 |
-
0.8645288085937466,
|
| 286 |
-
1.1490182495117125,
|
| 287 |
-
1.4187112426757835,
|
| 288 |
-
1.667842102050784,
|
| 289 |
-
1.8516342163085966,
|
| 290 |
-
1.6831582641601557,
|
| 291 |
-
1.5933966064453102,
|
| 292 |
-
1.7942288208007824,
|
| 293 |
-
2.1771484374999943,
|
| 294 |
-
1.8165200805664057,
|
| 295 |
-
1.8638430786132858,
|
| 296 |
-
1.9724639892578182
|
| 297 |
-
],
|
| 298 |
-
"q_pooled": 1.4459362792968733
|
| 299 |
-
},
|
| 300 |
-
"conf=0.9": {
|
| 301 |
-
"nominal_coverage": 0.9,
|
| 302 |
-
"bare_coverage_mean": 0.7309523809523808,
|
| 303 |
-
"bare_width_mean": 5.977349718411763,
|
| 304 |
-
"perhorizon_coverage_mean": 0.6071428571428572,
|
| 305 |
-
"perhorizon_width_mean": 4.111253226143984,
|
| 306 |
-
"pooled_coverage_mean": 0.6023809523809524,
|
| 307 |
-
"pooled_width_mean": 4.0517645263671795,
|
| 308 |
-
"q_per_horizon": [
|
| 309 |
-
0.7398001098632818,
|
| 310 |
-
1.542530517578129,
|
| 311 |
-
1.4136145019531199,
|
| 312 |
-
2.0581530761718767,
|
| 313 |
-
1.8112579345703068,
|
| 314 |
-
2.3215438842773466,
|
| 315 |
-
2.0993005371093716,
|
| 316 |
-
2.064953918457036,
|
| 317 |
-
2.4423132324218813,
|
| 318 |
-
2.698671264648439,
|
| 319 |
-
2.4562600708007807,
|
| 320 |
-
2.32724975585937,
|
| 321 |
-
2.5256872558593813,
|
| 322 |
-
2.277436523437501
|
| 323 |
-
],
|
| 324 |
-
"q_pooled": 2.0258822631835898
|
| 325 |
-
},
|
| 326 |
-
"conf=0.95": {
|
| 327 |
-
"nominal_coverage": 0.95,
|
| 328 |
-
"bare_coverage_mean": 0.7309523809523808,
|
| 329 |
-
"bare_width_mean": 5.977349718411763,
|
| 330 |
-
"perhorizon_coverage_mean": 0.7190476190476188,
|
| 331 |
-
"perhorizon_width_mean": 5.96463936941964,
|
| 332 |
-
"pooled_coverage_mean": 0.6809523809523809,
|
| 333 |
-
"pooled_width_mean": 5.0513745117187625,
|
| 334 |
-
"q_per_horizon": [
|
| 335 |
-
0.930439453125004,
|
| 336 |
-
2.665478515624997,
|
| 337 |
-
1.9302044677734358,
|
| 338 |
-
2.0884591674804653,
|
| 339 |
-
2.7411437988281193,
|
| 340 |
-
3.6284613037109352,
|
| 341 |
-
3.513445739746089,
|
| 342 |
-
3.5274569702148426,
|
| 343 |
-
4.001575012207027,
|
| 344 |
-
3.9003729248046852,
|
| 345 |
-
3.2779876708984403,
|
| 346 |
-
3.0333639526367193,
|
| 347 |
-
3.0030249023437534,
|
| 348 |
-
3.511061706542975
|
| 349 |
-
],
|
| 350 |
-
"q_pooled": 2.5256872558593813
|
| 351 |
-
}
|
| 352 |
-
}
|
| 353 |
-
},
|
| 354 |
-
"DEXUSEU": {
|
| 355 |
-
"arima": {
|
| 356 |
-
"forecaster": "arima",
|
| 357 |
-
"n_cal": 30,
|
| 358 |
-
"n_test": 30,
|
| 359 |
-
"conf=0.8": {
|
| 360 |
-
"nominal_coverage": 0.8,
|
| 361 |
-
"bare_coverage_mean": 0.8595238095238095,
|
| 362 |
-
"bare_width_mean": 0.037255051394705835,
|
| 363 |
-
"perhorizon_coverage_mean": 0.811904761904762,
|
| 364 |
-
"perhorizon_width_mean": 0.03243267317446737,
|
| 365 |
-
"pooled_coverage_mean": 0.8166666666666665,
|
| 366 |
-
"pooled_width_mean": 0.031645107249388627,
|
| 367 |
-
"q_per_horizon": [
|
| 368 |
-
0.006537154478817753,
|
| 369 |
-
0.007333177556922088,
|
| 370 |
-
0.012312774872748289,
|
| 371 |
-
0.014043924961390397,
|
| 372 |
-
0.016017799097016727,
|
| 373 |
-
0.015644421534730224,
|
| 374 |
-
0.016336252170641608,
|
| 375 |
-
0.016122979608933496,
|
| 376 |
-
0.01964457489050009,
|
| 377 |
-
0.02072169154979453,
|
| 378 |
-
0.024118006869554565,
|
| 379 |
-
0.018656617879449167,
|
| 380 |
-
0.017769218599013037,
|
| 381 |
-
0.021770118151759554
|
| 382 |
-
],
|
| 383 |
-
"q_pooled": 0.015822553624694313
|
| 384 |
-
},
|
| 385 |
-
"conf=0.9": {
|
| 386 |
-
"nominal_coverage": 0.9,
|
| 387 |
-
"bare_coverage_mean": 0.9142857142857144,
|
| 388 |
-
"bare_width_mean": 0.047816340798432555,
|
| 389 |
-
"perhorizon_coverage_mean": 0.8904761904761905,
|
| 390 |
-
"perhorizon_width_mean": 0.04285578362084427,
|
| 391 |
-
"pooled_coverage_mean": 0.8809523809523809,
|
| 392 |
-
"pooled_width_mean": 0.041073044538626924,
|
| 393 |
-
"q_per_horizon": [
|
| 394 |
-
0.006761841674864266,
|
| 395 |
-
0.01182171512244512,
|
| 396 |
-
0.015822553624694313,
|
| 397 |
-
0.02093465874643763,
|
| 398 |
-
0.019889187414578124,
|
| 399 |
-
0.01963882946285489,
|
| 400 |
-
0.02190089656490879,
|
| 401 |
-
0.021692702530445862,
|
| 402 |
-
0.024590684771490512,
|
| 403 |
-
0.024756601121440625,
|
| 404 |
-
0.02609594060524123,
|
| 405 |
-
0.02889462135779275,
|
| 406 |
-
0.02689529861576956,
|
| 407 |
-
0.030294953732946217
|
| 408 |
-
],
|
| 409 |
-
"q_pooled": 0.020536522269313462
|
| 410 |
-
},
|
| 411 |
-
"conf=0.95": {
|
| 412 |
-
"nominal_coverage": 0.95,
|
| 413 |
-
"bare_coverage_mean": 0.9380952380952381,
|
| 414 |
-
"bare_width_mean": 0.05697668430905675,
|
| 415 |
-
"perhorizon_coverage_mean": 0.9404761904761906,
|
| 416 |
-
"perhorizon_width_mean": 0.05919364307194989,
|
| 417 |
-
"pooled_coverage_mean": 0.9119047619047618,
|
| 418 |
-
"pooled_width_mean": 0.05176715217769701,
|
| 419 |
-
"q_per_horizon": [
|
| 420 |
-
0.011752772972313252,
|
| 421 |
-
0.01247253748338717,
|
| 422 |
-
0.01748801536532918,
|
| 423 |
-
0.02383577073487353,
|
| 424 |
-
0.02364315675893547,
|
| 425 |
-
0.02218707632552186,
|
| 426 |
-
0.03203504055001494,
|
| 427 |
-
0.030332454296178923,
|
| 428 |
-
0.03750274950896193,
|
| 429 |
-
0.03613221732608629,
|
| 430 |
-
0.039232376756770826,
|
| 431 |
-
0.04010448928765342,
|
| 432 |
-
0.04080440634480942,
|
| 433 |
-
0.046832437792812875
|
| 434 |
-
],
|
| 435 |
-
"q_pooled": 0.025883576088848503
|
| 436 |
-
}
|
| 437 |
-
},
|
| 438 |
-
"chronos": {
|
| 439 |
-
"forecaster": "chronos",
|
| 440 |
-
"n_cal": 30,
|
| 441 |
-
"n_test": 30,
|
| 442 |
-
"conf=0.8": {
|
| 443 |
-
"nominal_coverage": 0.8,
|
| 444 |
-
"bare_coverage_mean": 0.8,
|
| 445 |
-
"bare_width_mean": 0.03301220412055651,
|
| 446 |
-
"perhorizon_coverage_mean": 0.8071428571428574,
|
| 447 |
-
"perhorizon_width_mean": 0.03432217042105538,
|
| 448 |
-
"pooled_coverage_mean": 0.8000000000000002,
|
| 449 |
-
"pooled_width_mean": 0.03300358161926287,
|
| 450 |
-
"q_per_horizon": [
|
| 451 |
-
0.004584144783019939,
|
| 452 |
-
0.007060681152343706,
|
| 453 |
-
0.01243185882568354,
|
| 454 |
-
0.01602103652954101,
|
| 455 |
-
0.01641003990173351,
|
| 456 |
-
0.015545682907104563,
|
| 457 |
-
0.018368010711669935,
|
| 458 |
-
0.01898662319183342,
|
| 459 |
-
0.022148969459533596,
|
| 460 |
-
0.02255078582763681,
|
| 461 |
-
0.023978458976745554,
|
| 462 |
-
0.020319693946838413,
|
| 463 |
-
0.017313012123107985,
|
| 464 |
-
0.024536194610595752
|
| 465 |
-
],
|
| 466 |
-
"q_pooled": 0.016501790809631434
|
| 467 |
-
},
|
| 468 |
-
"conf=0.9": {
|
| 469 |
-
"nominal_coverage": 0.9,
|
| 470 |
-
"bare_coverage_mean": 0.8,
|
| 471 |
-
"bare_width_mean": 0.03301220412055651,
|
| 472 |
-
"perhorizon_coverage_mean": 0.9190476190476191,
|
| 473 |
-
"perhorizon_width_mean": 0.05077633157457622,
|
| 474 |
-
"pooled_coverage_mean": 0.8904761904761905,
|
| 475 |
-
"pooled_width_mean": 0.04548504829406719,
|
| 476 |
-
"q_per_horizon": [
|
| 477 |
-
0.008554865837097081,
|
| 478 |
-
0.00971177463531503,
|
| 479 |
-
0.01530143814086915,
|
| 480 |
-
0.01911055355072011,
|
| 481 |
-
0.01780367832183849,
|
| 482 |
-
0.021554478836059543,
|
| 483 |
-
0.026538812255859412,
|
| 484 |
-
0.027544754409789984,
|
| 485 |
-
0.028936708450317372,
|
| 486 |
-
0.03478273067474369,
|
| 487 |
-
0.0382537099838256,
|
| 488 |
-
0.03136329650878911,
|
| 489 |
-
0.0327265468597413,
|
| 490 |
-
0.04325097255706778
|
| 491 |
-
],
|
| 492 |
-
"q_pooled": 0.022742524147033594
|
| 493 |
-
},
|
| 494 |
-
"conf=0.95": {
|
| 495 |
-
"nominal_coverage": 0.95,
|
| 496 |
-
"bare_coverage_mean": 0.8,
|
| 497 |
-
"bare_width_mean": 0.03301220412055651,
|
| 498 |
-
"perhorizon_coverage_mean": 0.9404761904761905,
|
| 499 |
-
"perhorizon_width_mean": 0.0633313385554722,
|
| 500 |
-
"pooled_coverage_mean": 0.9547619047619046,
|
| 501 |
-
"pooled_width_mean": 0.06135401725769052,
|
| 502 |
-
"q_per_horizon": [
|
| 503 |
-
0.011944815063476666,
|
| 504 |
-
0.01392391796112058,
|
| 505 |
-
0.017532272148132355,
|
| 506 |
-
0.022742524147033594,
|
| 507 |
-
0.02558988399505613,
|
| 508 |
-
0.02623647480010982,
|
| 509 |
-
0.03067700862884526,
|
| 510 |
-
0.034072942352294966,
|
| 511 |
-
0.04179227085113535,
|
| 512 |
-
0.0389519283294677,
|
| 513 |
-
0.042779201126098565,
|
| 514 |
-
0.04429976444244388,
|
| 515 |
-
0.044917986869811966,
|
| 516 |
-
0.04785837917327873
|
| 517 |
-
],
|
| 518 |
-
"q_pooled": 0.03067700862884526
|
| 519 |
-
}
|
| 520 |
-
}
|
| 521 |
-
},
|
| 522 |
-
"DEXCHUS": {
|
| 523 |
-
"arima": {
|
| 524 |
-
"forecaster": "arima",
|
| 525 |
-
"n_cal": 30,
|
| 526 |
-
"n_test": 30,
|
| 527 |
-
"conf=0.8": {
|
| 528 |
-
"nominal_coverage": 0.8,
|
| 529 |
-
"bare_coverage_mean": 0.8309523809523809,
|
| 530 |
-
"bare_width_mean": 0.12023258914287749,
|
| 531 |
-
"perhorizon_coverage_mean": 0.8,
|
| 532 |
-
"perhorizon_width_mean": 0.10379373004234645,
|
| 533 |
-
"pooled_coverage_mean": 0.7833333333333333,
|
| 534 |
-
"pooled_width_mean": 0.0905579673492376,
|
| 535 |
-
"q_per_horizon": [
|
| 536 |
-
0.01913552539082275,
|
| 537 |
-
0.021503803498270635,
|
| 538 |
-
0.03202273363733443,
|
| 539 |
-
0.04471228016293516,
|
| 540 |
-
0.04595743067166769,
|
| 541 |
-
0.057142529866381686,
|
| 542 |
-
0.041567074905930035,
|
| 543 |
-
0.05922440211999547,
|
| 544 |
-
0.06055238630005544,
|
| 545 |
-
0.06195863987337091,
|
| 546 |
-
0.07735612435271388,
|
| 547 |
-
0.07482211423245033,
|
| 548 |
-
0.0613510301071134,
|
| 549 |
-
0.06925003517738304
|
| 550 |
-
],
|
| 551 |
-
"q_pooled": 0.0452789836746188
|
| 552 |
-
},
|
| 553 |
-
"conf=0.9": {
|
| 554 |
-
"nominal_coverage": 0.9,
|
| 555 |
-
"bare_coverage_mean": 0.8761904761904763,
|
| 556 |
-
"bare_width_mean": 0.1543168575080998,
|
| 557 |
-
"perhorizon_coverage_mean": 0.8857142857142858,
|
| 558 |
-
"perhorizon_width_mean": 0.1694623051285068,
|
| 559 |
-
"pooled_coverage_mean": 0.8833333333333333,
|
| 560 |
-
"pooled_width_mean": 0.14964422846490066,
|
| 561 |
-
"q_per_horizon": [
|
| 562 |
-
0.026065770883445083,
|
| 563 |
-
0.03663070092160048,
|
| 564 |
-
0.04814005922096687,
|
| 565 |
-
0.05434837199719045,
|
| 566 |
-
0.06341843160370875,
|
| 567 |
-
0.06742875148755179,
|
| 568 |
-
0.08909509445192665,
|
| 569 |
-
0.09169474000207156,
|
| 570 |
-
0.11607218346504666,
|
| 571 |
-
0.12686121412365825,
|
| 572 |
-
0.11025109977698122,
|
| 573 |
-
0.12555183014476246,
|
| 574 |
-
0.11555182580724122,
|
| 575 |
-
0.11512606201339626
|
| 576 |
-
],
|
| 577 |
-
"q_pooled": 0.07482211423245033
|
| 578 |
-
},
|
| 579 |
-
"conf=0.95": {
|
| 580 |
-
"nominal_coverage": 0.95,
|
| 581 |
-
"bare_coverage_mean": 0.9142857142857144,
|
| 582 |
-
"bare_width_mean": 0.18387987719237844,
|
| 583 |
-
"perhorizon_coverage_mean": 0.9523809523809524,
|
| 584 |
-
"perhorizon_width_mean": 0.2451580685008066,
|
| 585 |
-
"pooled_coverage_mean": 0.9285714285714286,
|
| 586 |
-
"pooled_width_mean": 0.22228302327474836,
|
| 587 |
-
"q_per_horizon": [
|
| 588 |
-
0.032681838125458995,
|
| 589 |
-
0.07173662444320072,
|
| 590 |
-
0.06519382424998543,
|
| 591 |
-
0.06079908928748701,
|
| 592 |
-
0.09872806564422376,
|
| 593 |
-
0.10867467864500302,
|
| 594 |
-
0.11114151163737418,
|
| 595 |
-
0.14390234892072673,
|
| 596 |
-
0.14109477023066574,
|
| 597 |
-
0.1721305319733375,
|
| 598 |
-
0.17782669739203882,
|
| 599 |
-
0.18559857212707964,
|
| 600 |
-
0.17849914242157627,
|
| 601 |
-
0.16809878440748793
|
| 602 |
-
],
|
| 603 |
-
"q_pooled": 0.11114151163737418
|
| 604 |
-
}
|
| 605 |
-
},
|
| 606 |
-
"chronos": {
|
| 607 |
-
"forecaster": "chronos",
|
| 608 |
-
"n_cal": 30,
|
| 609 |
-
"n_test": 30,
|
| 610 |
-
"conf=0.8": {
|
| 611 |
-
"nominal_coverage": 0.8,
|
| 612 |
-
"bare_coverage_mean": 0.8428571428571429,
|
| 613 |
-
"bare_width_mean": 0.11959348532060782,
|
| 614 |
-
"perhorizon_coverage_mean": 0.7833333333333333,
|
| 615 |
-
"perhorizon_width_mean": 0.10019261191231878,
|
| 616 |
-
"pooled_coverage_mean": 0.8,
|
| 617 |
-
"pooled_width_mean": 0.09779591979980395,
|
| 618 |
-
"q_per_horizon": [
|
| 619 |
-
0.025188607788085626,
|
| 620 |
-
0.02532754745483423,
|
| 621 |
-
0.03890764770507804,
|
| 622 |
-
0.043802440643310625,
|
| 623 |
-
0.04915690460205102,
|
| 624 |
-
0.04680775070190446,
|
| 625 |
-
0.03916668243408239,
|
| 626 |
-
0.04809946746826199,
|
| 627 |
-
0.0576093139648437,
|
| 628 |
-
0.06108116531372065,
|
| 629 |
-
0.05864996337890638,
|
| 630 |
-
0.06179137878417951,
|
| 631 |
-
0.0701272941589357,
|
| 632 |
-
0.0756321189880369
|
| 633 |
-
],
|
| 634 |
-
"q_pooled": 0.04889795989990198
|
| 635 |
-
},
|
| 636 |
-
"conf=0.9": {
|
| 637 |
-
"nominal_coverage": 0.9,
|
| 638 |
-
"bare_coverage_mean": 0.8428571428571429,
|
| 639 |
-
"bare_width_mean": 0.11959348532060782,
|
| 640 |
-
"perhorizon_coverage_mean": 0.869047619047619,
|
| 641 |
-
"perhorizon_width_mean": 0.16607914559500545,
|
| 642 |
-
"pooled_coverage_mean": 0.861904761904762,
|
| 643 |
-
"pooled_width_mean": 0.1402545883178714,
|
| 644 |
-
"q_per_horizon": [
|
| 645 |
-
0.030081840515136626,
|
| 646 |
-
0.04935519256591814,
|
| 647 |
-
0.046391881561278936,
|
| 648 |
-
0.050782734680176134,
|
| 649 |
-
0.06024611434936489,
|
| 650 |
-
0.06782592163085965,
|
| 651 |
-
0.08113353042602522,
|
| 652 |
-
0.09840077590942364,
|
| 653 |
-
0.11880251922607421,
|
| 654 |
-
0.12758038635253932,
|
| 655 |
-
0.10697886581420857,
|
| 656 |
-
0.12221163177490268,
|
| 657 |
-
0.10586601409912078,
|
| 658 |
-
0.09689661026000973
|
| 659 |
-
],
|
| 660 |
-
"q_pooled": 0.0701272941589357
|
| 661 |
-
},
|
| 662 |
-
"conf=0.95": {
|
| 663 |
-
"nominal_coverage": 0.95,
|
| 664 |
-
"bare_coverage_mean": 0.8428571428571429,
|
| 665 |
-
"bare_width_mean": 0.11959348532060782,
|
| 666 |
-
"perhorizon_coverage_mean": 0.9214285714285714,
|
| 667 |
-
"perhorizon_width_mean": 0.22292400338309162,
|
| 668 |
-
"pooled_coverage_mean": 0.9095238095238095,
|
| 669 |
-
"pooled_width_mean": 0.2085365203857421,
|
| 670 |
-
"q_per_horizon": [
|
| 671 |
-
0.03159678268432575,
|
| 672 |
-
0.07481312255859418,
|
| 673 |
-
0.07034568023681675,
|
| 674 |
-
0.05222851562499997,
|
| 675 |
-
0.070854161071777,
|
| 676 |
-
0.09303555068969693,
|
| 677 |
-
0.08751402359008775,
|
| 678 |
-
0.13737474822998053,
|
| 679 |
-
0.1317485343933109,
|
| 680 |
-
0.15814713668823277,
|
| 681 |
-
0.1641494514465336,
|
| 682 |
-
0.1720175582885739,
|
| 683 |
-
0.16296061859130884,
|
| 684 |
-
0.15368213958740196
|
| 685 |
-
],
|
| 686 |
-
"q_pooled": 0.10426826019287105
|
| 687 |
-
}
|
| 688 |
-
}
|
| 689 |
-
},
|
| 690 |
-
"DEXKOUS": {
|
| 691 |
-
"arima": {
|
| 692 |
-
"forecaster": "arima",
|
| 693 |
-
"n_cal": 30,
|
| 694 |
-
"n_test": 30,
|
| 695 |
-
"conf=0.8": {
|
| 696 |
-
"nominal_coverage": 0.8,
|
| 697 |
-
"bare_coverage_mean": 0.7071428571428572,
|
| 698 |
-
"bare_width_mean": 41.40702231782995,
|
| 699 |
-
"perhorizon_coverage_mean": 0.6809523809523808,
|
| 700 |
-
"perhorizon_width_mean": 40.33834903476961,
|
| 701 |
-
"pooled_coverage_mean": 0.738095238095238,
|
| 702 |
-
"pooled_width_mean": 40.174430225697506,
|
| 703 |
-
"q_per_horizon": [
|
| 704 |
-
6.019828757339383,
|
| 705 |
-
9.23651622262787,
|
| 706 |
-
11.885457212575375,
|
| 707 |
-
14.301239776206785,
|
| 708 |
-
16.538830978627857,
|
| 709 |
-
21.11794087612452,
|
| 710 |
-
21.007107424806236,
|
| 711 |
-
22.089443667480282,
|
| 712 |
-
22.26134568228099,
|
| 713 |
-
25.115703414253176,
|
| 714 |
-
26.282158971560648,
|
| 715 |
-
28.31230917980338,
|
| 716 |
-
28.622331265376488,
|
| 717 |
-
29.57822981432423
|
| 718 |
-
],
|
| 719 |
-
"q_pooled": 20.087215112848753
|
| 720 |
-
},
|
| 721 |
-
"conf=0.9": {
|
| 722 |
-
"nominal_coverage": 0.9,
|
| 723 |
-
"bare_coverage_mean": 0.8023809523809522,
|
| 724 |
-
"bare_width_mean": 53.145337785764546,
|
| 725 |
-
"perhorizon_coverage_mean": 0.7476190476190475,
|
| 726 |
-
"perhorizon_width_mean": 47.514067959856646,
|
| 727 |
-
"pooled_coverage_mean": 0.8166666666666665,
|
| 728 |
-
"pooled_width_mean": 51.703697664495394,
|
| 729 |
-
"q_per_horizon": [
|
| 730 |
-
7.042854649616629,
|
| 731 |
-
11.217728114270585,
|
| 732 |
-
13.051289508962782,
|
| 733 |
-
17.974908318198914,
|
| 734 |
-
22.696578397519033,
|
| 735 |
-
24.786648186653792,
|
| 736 |
-
23.205692899009136,
|
| 737 |
-
25.439228843483306,
|
| 738 |
-
28.745883742858496,
|
| 739 |
-
27.649073917800933,
|
| 740 |
-
32.25531441260455,
|
| 741 |
-
33.39915882237847,
|
| 742 |
-
32.317174372199815,
|
| 743 |
-
32.81694153344006
|
| 744 |
-
],
|
| 745 |
-
"q_pooled": 25.851848832247697
|
| 746 |
-
},
|
| 747 |
-
"conf=0.95": {
|
| 748 |
-
"nominal_coverage": 0.95,
|
| 749 |
-
"bare_coverage_mean": 0.8952380952380953,
|
| 750 |
-
"bare_width_mean": 63.326575872509096,
|
| 751 |
-
"perhorizon_coverage_mean": 0.8833333333333332,
|
| 752 |
-
"perhorizon_width_mean": 62.3317263081943,
|
| 753 |
-
"pooled_coverage_mean": 0.861904761904762,
|
| 754 |
-
"pooled_width_mean": 63.003314010262784,
|
| 755 |
-
"q_per_horizon": [
|
| 756 |
-
12.416104342710696,
|
| 757 |
-
13.332090802595758,
|
| 758 |
-
20.658854986845654,
|
| 759 |
-
37.144614564726226,
|
| 760 |
-
31.230195571947434,
|
| 761 |
-
31.501657005131392,
|
| 762 |
-
31.466225645210898,
|
| 763 |
-
32.67178752649829,
|
| 764 |
-
41.05990019882688,
|
| 765 |
-
37.85425421989498,
|
| 766 |
-
37.08859079038166,
|
| 767 |
-
35.26046070337611,
|
| 768 |
-
40.538744747242845,
|
| 769 |
-
34.098603051971395
|
| 770 |
-
],
|
| 771 |
-
"q_pooled": 31.501657005131392
|
| 772 |
-
}
|
| 773 |
-
},
|
| 774 |
-
"chronos": {
|
| 775 |
-
"forecaster": "chronos",
|
| 776 |
-
"n_cal": 30,
|
| 777 |
-
"n_test": 30,
|
| 778 |
-
"conf=0.8": {
|
| 779 |
-
"nominal_coverage": 0.8,
|
| 780 |
-
"bare_coverage_mean": 0.7476190476190475,
|
| 781 |
-
"bare_width_mean": 47.698866081237796,
|
| 782 |
-
"perhorizon_coverage_mean": 0.669047619047619,
|
| 783 |
-
"perhorizon_width_mean": 42.05718540736606,
|
| 784 |
-
"pooled_coverage_mean": 0.7452380952380951,
|
| 785 |
-
"pooled_width_mean": 43.94189453125,
|
| 786 |
-
"q_per_horizon": [
|
| 787 |
-
6.6086572265624,
|
| 788 |
-
8.688681640624964,
|
| 789 |
-
11.395966796874973,
|
| 790 |
-
12.880576171874964,
|
| 791 |
-
17.0732275390626,
|
| 792 |
-
19.5968017578125,
|
| 793 |
-
19.40576171875,
|
| 794 |
-
24.150083007812555,
|
| 795 |
-
24.586870117187573,
|
| 796 |
-
26.251137695312536,
|
| 797 |
-
27.594218749999982,
|
| 798 |
-
32.349785156249936,
|
| 799 |
-
31.7150732421876,
|
| 800 |
-
32.103457031249945
|
| 801 |
-
],
|
| 802 |
-
"q_pooled": 21.970947265625
|
| 803 |
-
},
|
| 804 |
-
"conf=0.9": {
|
| 805 |
-
"nominal_coverage": 0.9,
|
| 806 |
-
"bare_coverage_mean": 0.7476190476190475,
|
| 807 |
-
"bare_width_mean": 47.698866081237796,
|
| 808 |
-
"perhorizon_coverage_mean": 0.7714285714285712,
|
| 809 |
-
"perhorizon_width_mean": 49.80674665178569,
|
| 810 |
-
"pooled_coverage_mean": 0.8357142857142856,
|
| 811 |
-
"pooled_width_mean": 56.23533203124998,
|
| 812 |
-
"q_per_horizon": [
|
| 813 |
-
8.360268554687536,
|
| 814 |
-
12.467915039062518,
|
| 815 |
-
14.159082031249909,
|
| 816 |
-
18.2329248046874,
|
| 817 |
-
23.688662109374945,
|
| 818 |
-
25.474423828125055,
|
| 819 |
-
24.956616210937455,
|
| 820 |
-
26.577456054687445,
|
| 821 |
-
28.821977539062573,
|
| 822 |
-
30.2672265624999,
|
| 823 |
-
33.08205566406241,
|
| 824 |
-
33.05286621093751,
|
| 825 |
-
33.24584472656261,
|
| 826 |
-
36.25990722656252
|
| 827 |
-
],
|
| 828 |
-
"q_pooled": 28.11766601562499
|
| 829 |
-
},
|
| 830 |
-
"conf=0.95": {
|
| 831 |
-
"nominal_coverage": 0.95,
|
| 832 |
-
"bare_coverage_mean": 0.7476190476190475,
|
| 833 |
-
"bare_width_mean": 47.698866081237796,
|
| 834 |
-
"perhorizon_coverage_mean": 0.8738095238095237,
|
| 835 |
-
"perhorizon_width_mean": 65.5785993303571,
|
| 836 |
-
"pooled_coverage_mean": 0.8666666666666666,
|
| 837 |
-
"pooled_width_mean": 66.16411132812482,
|
| 838 |
-
"q_per_horizon": [
|
| 839 |
-
14.446508789062591,
|
| 840 |
-
15.035361328124964,
|
| 841 |
-
21.486127929687427,
|
| 842 |
-
38.963662109375036,
|
| 843 |
-
33.86973144531248,
|
| 844 |
-
34.60525878906242,
|
| 845 |
-
33.86685546874992,
|
| 846 |
-
33.722353515624945,
|
| 847 |
-
41.170214843750045,
|
| 848 |
-
36.77112792968751,
|
| 849 |
-
37.77993652343753,
|
| 850 |
-
39.08779296874991,
|
| 851 |
-
39.80886230468741,
|
| 852 |
-
38.4364013671875
|
| 853 |
-
],
|
| 854 |
-
"q_pooled": 33.08205566406241
|
| 855 |
-
}
|
| 856 |
-
}
|
| 857 |
-
}
|
| 858 |
-
},
|
| 859 |
-
"elapsed_min": 1.141351056098938
|
| 860 |
}
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"targets": [
|
| 3 |
+
"DCOILWTICO",
|
| 4 |
+
"DEXJPUS",
|
| 5 |
+
"DEXUSEU",
|
| 6 |
+
"DEXCHUS",
|
| 7 |
+
"DEXKOUS"
|
| 8 |
+
],
|
| 9 |
+
"horizon": 14,
|
| 10 |
+
"confs": [
|
| 11 |
+
0.8,
|
| 12 |
+
0.9,
|
| 13 |
+
0.95
|
| 14 |
+
],
|
| 15 |
+
"n_cal": 30,
|
| 16 |
+
"n_test": 30,
|
| 17 |
+
"results": {
|
| 18 |
+
"DCOILWTICO": {
|
| 19 |
+
"arima": {
|
| 20 |
+
"forecaster": "arima",
|
| 21 |
+
"n_cal": 30,
|
| 22 |
+
"n_test": 30,
|
| 23 |
+
"conf=0.8": {
|
| 24 |
+
"nominal_coverage": 0.8,
|
| 25 |
+
"bare_coverage_mean": 0.8095238095238094,
|
| 26 |
+
"bare_width_mean": 10.867942261555571,
|
| 27 |
+
"perhorizon_coverage_mean": 0.6857142857142856,
|
| 28 |
+
"perhorizon_width_mean": 7.990994504643288,
|
| 29 |
+
"pooled_coverage_mean": 0.6785714285714285,
|
| 30 |
+
"pooled_width_mean": 8.029568159989491,
|
| 31 |
+
"q_per_horizon": [
|
| 32 |
+
2.0917427692512547,
|
| 33 |
+
2.414564146929898,
|
| 34 |
+
3.49864771255762,
|
| 35 |
+
3.783403014989574,
|
| 36 |
+
3.6514825270864293,
|
| 37 |
+
3.410638918826429,
|
| 38 |
+
3.6483267386695672,
|
| 39 |
+
4.291356370865486,
|
| 40 |
+
4.148100512774434,
|
| 41 |
+
4.765242660767733,
|
| 42 |
+
4.798738782538393,
|
| 43 |
+
4.648753353034714,
|
| 44 |
+
5.111777984600735,
|
| 45 |
+
5.674186039610767
|
| 46 |
+
],
|
| 47 |
+
"q_pooled": 4.014784079994747
|
| 48 |
+
},
|
| 49 |
+
"conf=0.9": {
|
| 50 |
+
"nominal_coverage": 0.9,
|
| 51 |
+
"bare_coverage_mean": 0.9214285714285715,
|
| 52 |
+
"bare_width_mean": 13.948852880392929,
|
| 53 |
+
"perhorizon_coverage_mean": 0.7809523809523811,
|
| 54 |
+
"perhorizon_width_mean": 10.031165041917506,
|
| 55 |
+
"pooled_coverage_mean": 0.7738095238095238,
|
| 56 |
+
"pooled_width_mean": 10.167074585069713,
|
| 57 |
+
"q_per_horizon": [
|
| 58 |
+
2.300277140003125,
|
| 59 |
+
4.097940221459595,
|
| 60 |
+
4.076376633492892,
|
| 61 |
+
4.703831136719856,
|
| 62 |
+
4.842398951063927,
|
| 63 |
+
5.337677242975467,
|
| 64 |
+
4.359396527417836,
|
| 65 |
+
6.151868291801264,
|
| 66 |
+
5.051950062063291,
|
| 67 |
+
5.854070590337393,
|
| 68 |
+
5.368481950759772,
|
| 69 |
+
5.284114635080698,
|
| 70 |
+
6.431339982770957,
|
| 71 |
+
6.3584319274764525
|
| 72 |
+
],
|
| 73 |
+
"q_pooled": 5.0835372925348565
|
| 74 |
+
},
|
| 75 |
+
"conf=0.95": {
|
| 76 |
+
"nominal_coverage": 0.95,
|
| 77 |
+
"bare_coverage_mean": 0.9452380952380951,
|
| 78 |
+
"bare_width_mean": 16.621083373775793,
|
| 79 |
+
"perhorizon_coverage_mean": 0.9261904761904761,
|
| 80 |
+
"perhorizon_width_mean": 14.611219531249459,
|
| 81 |
+
"pooled_coverage_mean": 0.838095238095238,
|
| 82 |
+
"pooled_width_mean": 12.16250013730463,
|
| 83 |
+
"q_per_horizon": [
|
| 84 |
+
3.0531114213612582,
|
| 85 |
+
5.059338828648023,
|
| 86 |
+
5.697604686526287,
|
| 87 |
+
7.146009479872129,
|
| 88 |
+
5.3182905673299175,
|
| 89 |
+
7.39090190741959,
|
| 90 |
+
6.856329650125417,
|
| 91 |
+
7.199424687832007,
|
| 92 |
+
6.523429069811058,
|
| 93 |
+
6.548845442730201,
|
| 94 |
+
9.62406528058468,
|
| 95 |
+
8.603787092463286,
|
| 96 |
+
11.553679176235391,
|
| 97 |
+
11.703719427806988
|
| 98 |
+
],
|
| 99 |
+
"q_pooled": 6.0812500686523165
|
| 100 |
+
}
|
| 101 |
+
},
|
| 102 |
+
"chronos": {
|
| 103 |
+
"forecaster": "chronos",
|
| 104 |
+
"n_cal": 30,
|
| 105 |
+
"n_test": 30,
|
| 106 |
+
"conf=0.8": {
|
| 107 |
+
"nominal_coverage": 0.8,
|
| 108 |
+
"bare_coverage_mean": 0.7809523809523807,
|
| 109 |
+
"bare_width_mean": 11.050525585810343,
|
| 110 |
+
"perhorizon_coverage_mean": 0.6547619047619048,
|
| 111 |
+
"perhorizon_width_mean": 8.338129283360074,
|
| 112 |
+
"pooled_coverage_mean": 0.6452380952380952,
|
| 113 |
+
"pooled_width_mean": 8.036834106445315,
|
| 114 |
+
"q_per_horizon": [
|
| 115 |
+
2.1229774475097685,
|
| 116 |
+
2.4522241210937494,
|
| 117 |
+
3.261205139160154,
|
| 118 |
+
3.9071347045898435,
|
| 119 |
+
3.614091110229495,
|
| 120 |
+
3.6567034912109406,
|
| 121 |
+
3.993652496337887,
|
| 122 |
+
4.4286404418945295,
|
| 123 |
+
4.545238494873047,
|
| 124 |
+
5.274034423828127,
|
| 125 |
+
5.24025115966797,
|
| 126 |
+
4.8420919799804665,
|
| 127 |
+
5.316376342773438,
|
| 128 |
+
5.71228363037109
|
| 129 |
+
],
|
| 130 |
+
"q_pooled": 4.018417053222656
|
| 131 |
+
},
|
| 132 |
+
"conf=0.9": {
|
| 133 |
+
"nominal_coverage": 0.9,
|
| 134 |
+
"bare_coverage_mean": 0.7809523809523807,
|
| 135 |
+
"bare_width_mean": 11.050525585810343,
|
| 136 |
+
"perhorizon_coverage_mean": 0.7880952380952381,
|
| 137 |
+
"perhorizon_width_mean": 11.069673222133089,
|
| 138 |
+
"pooled_coverage_mean": 0.769047619047619,
|
| 139 |
+
"pooled_width_mean": 10.63275268554687,
|
| 140 |
+
"q_per_horizon": [
|
| 141 |
+
2.555929565429693,
|
| 142 |
+
3.5912300109863295,
|
| 143 |
+
4.3903402709960915,
|
| 144 |
+
5.24416809082031,
|
| 145 |
+
4.982480926513674,
|
| 146 |
+
5.137361450195314,
|
| 147 |
+
5.586841278076172,
|
| 148 |
+
6.765305328369138,
|
| 149 |
+
6.67245574951172,
|
| 150 |
+
5.990972595214842,
|
| 151 |
+
5.718290405273436,
|
| 152 |
+
5.943902282714845,
|
| 153 |
+
7.989523162841799,
|
| 154 |
+
6.918911437988278
|
| 155 |
+
],
|
| 156 |
+
"q_pooled": 5.316376342773438
|
| 157 |
+
},
|
| 158 |
+
"conf=0.95": {
|
| 159 |
+
"nominal_coverage": 0.95,
|
| 160 |
+
"bare_coverage_mean": 0.7809523809523807,
|
| 161 |
+
"bare_width_mean": 11.050525585810343,
|
| 162 |
+
"perhorizon_coverage_mean": 0.9261904761904761,
|
| 163 |
+
"perhorizon_width_mean": 16.372548740931915,
|
| 164 |
+
"pooled_coverage_mean": 0.8547619047619047,
|
| 165 |
+
"pooled_width_mean": 13.761851806640617,
|
| 166 |
+
"q_per_horizon": [
|
| 167 |
+
4.500623779296873,
|
| 168 |
+
5.796702575683597,
|
| 169 |
+
4.578687438964849,
|
| 170 |
+
5.983569641113277,
|
| 171 |
+
7.369260253906248,
|
| 172 |
+
8.649095764160151,
|
| 173 |
+
8.18119262695312,
|
| 174 |
+
9.151351928710938,
|
| 175 |
+
8.256888427734381,
|
| 176 |
+
8.666538696289066,
|
| 177 |
+
10.109675750732421,
|
| 178 |
+
9.065566864013675,
|
| 179 |
+
12.079234161376952,
|
| 180 |
+
12.219453277587888
|
| 181 |
+
],
|
| 182 |
+
"q_pooled": 6.8809259033203105
|
| 183 |
+
}
|
| 184 |
+
}
|
| 185 |
+
},
|
| 186 |
+
"DEXJPUS": {
|
| 187 |
+
"arima": {
|
| 188 |
+
"forecaster": "arima",
|
| 189 |
+
"n_cal": 30,
|
| 190 |
+
"n_test": 30,
|
| 191 |
+
"conf=0.8": {
|
| 192 |
+
"nominal_coverage": 0.8,
|
| 193 |
+
"bare_coverage_mean": 0.6357142857142856,
|
| 194 |
+
"bare_width_mean": 4.436568793595841,
|
| 195 |
+
"perhorizon_coverage_mean": 0.45238095238095233,
|
| 196 |
+
"perhorizon_width_mean": 2.8685092642157013,
|
| 197 |
+
"pooled_coverage_mean": 0.4928571428571428,
|
| 198 |
+
"pooled_width_mean": 2.791173769264077,
|
| 199 |
+
"q_per_horizon": [
|
| 200 |
+
0.495163456754355,
|
| 201 |
+
0.8623131555344372,
|
| 202 |
+
0.8897926642558076,
|
| 203 |
+
1.1482011742546945,
|
| 204 |
+
1.28795516679331,
|
| 205 |
+
1.6477655987067266,
|
| 206 |
+
1.7443474583408118,
|
| 207 |
+
1.5384895904415004,
|
| 208 |
+
1.803162688834604,
|
| 209 |
+
1.7685075068830685,
|
| 210 |
+
1.7186420091775432,
|
| 211 |
+
1.5470661555772267,
|
| 212 |
+
1.888659928991629,
|
| 213 |
+
1.7394982949641928
|
| 214 |
+
],
|
| 215 |
+
"q_pooled": 1.3955868846320385
|
| 216 |
+
},
|
| 217 |
+
"conf=0.9": {
|
| 218 |
+
"nominal_coverage": 0.9,
|
| 219 |
+
"bare_coverage_mean": 0.7738095238095236,
|
| 220 |
+
"bare_width_mean": 5.694274399535953,
|
| 221 |
+
"perhorizon_coverage_mean": 0.5761904761904761,
|
| 222 |
+
"perhorizon_width_mean": 3.798189452444865,
|
| 223 |
+
"pooled_coverage_mean": 0.5809523809523809,
|
| 224 |
+
"pooled_width_mean": 3.8189608293080823,
|
| 225 |
+
"q_per_horizon": [
|
| 226 |
+
0.602618663621783,
|
| 227 |
+
1.5464872564533323,
|
| 228 |
+
1.410577522130609,
|
| 229 |
+
2.006457013067674,
|
| 230 |
+
1.9326982798289691,
|
| 231 |
+
1.871741039728505,
|
| 232 |
+
1.8724724170933484,
|
| 233 |
+
2.0184353738183205,
|
| 234 |
+
2.057205707305812,
|
| 235 |
+
2.300998677577681,
|
| 236 |
+
2.4584763121956854,
|
| 237 |
+
2.2610349692604643,
|
| 238 |
+
2.141044083930069,
|
| 239 |
+
2.1070788511018037
|
| 240 |
+
],
|
| 241 |
+
"q_pooled": 1.9094804146540412
|
| 242 |
+
},
|
| 243 |
+
"conf=0.95": {
|
| 244 |
+
"nominal_coverage": 0.95,
|
| 245 |
+
"bare_coverage_mean": 0.8738095238095237,
|
| 246 |
+
"bare_width_mean": 6.7851464460479765,
|
| 247 |
+
"perhorizon_coverage_mean": 0.8023809523809523,
|
| 248 |
+
"perhorizon_width_mean": 6.101635459825262,
|
| 249 |
+
"pooled_coverage_mean": 0.6571428571428571,
|
| 250 |
+
"pooled_width_mean": 4.601997355155362,
|
| 251 |
+
"q_per_horizon": [
|
| 252 |
+
0.9380858484970958,
|
| 253 |
+
2.323515167056655,
|
| 254 |
+
1.946219636173069,
|
| 255 |
+
2.2116051075864647,
|
| 256 |
+
2.7206754280723686,
|
| 257 |
+
3.562227529556367,
|
| 258 |
+
3.502961358052417,
|
| 259 |
+
3.5922479170316564,
|
| 260 |
+
4.142317883234554,
|
| 261 |
+
4.062380770386838,
|
| 262 |
+
3.5722844723094056,
|
| 263 |
+
3.2623018774721544,
|
| 264 |
+
3.212317495709044,
|
| 265 |
+
3.6623077276387335
|
| 266 |
+
],
|
| 267 |
+
"q_pooled": 2.300998677577681
|
| 268 |
+
}
|
| 269 |
+
},
|
| 270 |
+
"chronos": {
|
| 271 |
+
"forecaster": "chronos",
|
| 272 |
+
"n_cal": 30,
|
| 273 |
+
"n_test": 30,
|
| 274 |
+
"conf=0.8": {
|
| 275 |
+
"nominal_coverage": 0.8,
|
| 276 |
+
"bare_coverage_mean": 0.7309523809523808,
|
| 277 |
+
"bare_width_mean": 5.977349718411763,
|
| 278 |
+
"perhorizon_coverage_mean": 0.47380952380952385,
|
| 279 |
+
"perhorizon_width_mean": 3.038026166643411,
|
| 280 |
+
"pooled_coverage_mean": 0.49761904761904757,
|
| 281 |
+
"pooled_width_mean": 2.8918725585937466,
|
| 282 |
+
"q_per_horizon": [
|
| 283 |
+
0.5868325805664085,
|
| 284 |
+
0.8268566894531233,
|
| 285 |
+
0.8645288085937466,
|
| 286 |
+
1.1490182495117125,
|
| 287 |
+
1.4187112426757835,
|
| 288 |
+
1.667842102050784,
|
| 289 |
+
1.8516342163085966,
|
| 290 |
+
1.6831582641601557,
|
| 291 |
+
1.5933966064453102,
|
| 292 |
+
1.7942288208007824,
|
| 293 |
+
2.1771484374999943,
|
| 294 |
+
1.8165200805664057,
|
| 295 |
+
1.8638430786132858,
|
| 296 |
+
1.9724639892578182
|
| 297 |
+
],
|
| 298 |
+
"q_pooled": 1.4459362792968733
|
| 299 |
+
},
|
| 300 |
+
"conf=0.9": {
|
| 301 |
+
"nominal_coverage": 0.9,
|
| 302 |
+
"bare_coverage_mean": 0.7309523809523808,
|
| 303 |
+
"bare_width_mean": 5.977349718411763,
|
| 304 |
+
"perhorizon_coverage_mean": 0.6071428571428572,
|
| 305 |
+
"perhorizon_width_mean": 4.111253226143984,
|
| 306 |
+
"pooled_coverage_mean": 0.6023809523809524,
|
| 307 |
+
"pooled_width_mean": 4.0517645263671795,
|
| 308 |
+
"q_per_horizon": [
|
| 309 |
+
0.7398001098632818,
|
| 310 |
+
1.542530517578129,
|
| 311 |
+
1.4136145019531199,
|
| 312 |
+
2.0581530761718767,
|
| 313 |
+
1.8112579345703068,
|
| 314 |
+
2.3215438842773466,
|
| 315 |
+
2.0993005371093716,
|
| 316 |
+
2.064953918457036,
|
| 317 |
+
2.4423132324218813,
|
| 318 |
+
2.698671264648439,
|
| 319 |
+
2.4562600708007807,
|
| 320 |
+
2.32724975585937,
|
| 321 |
+
2.5256872558593813,
|
| 322 |
+
2.277436523437501
|
| 323 |
+
],
|
| 324 |
+
"q_pooled": 2.0258822631835898
|
| 325 |
+
},
|
| 326 |
+
"conf=0.95": {
|
| 327 |
+
"nominal_coverage": 0.95,
|
| 328 |
+
"bare_coverage_mean": 0.7309523809523808,
|
| 329 |
+
"bare_width_mean": 5.977349718411763,
|
| 330 |
+
"perhorizon_coverage_mean": 0.7190476190476188,
|
| 331 |
+
"perhorizon_width_mean": 5.96463936941964,
|
| 332 |
+
"pooled_coverage_mean": 0.6809523809523809,
|
| 333 |
+
"pooled_width_mean": 5.0513745117187625,
|
| 334 |
+
"q_per_horizon": [
|
| 335 |
+
0.930439453125004,
|
| 336 |
+
2.665478515624997,
|
| 337 |
+
1.9302044677734358,
|
| 338 |
+
2.0884591674804653,
|
| 339 |
+
2.7411437988281193,
|
| 340 |
+
3.6284613037109352,
|
| 341 |
+
3.513445739746089,
|
| 342 |
+
3.5274569702148426,
|
| 343 |
+
4.001575012207027,
|
| 344 |
+
3.9003729248046852,
|
| 345 |
+
3.2779876708984403,
|
| 346 |
+
3.0333639526367193,
|
| 347 |
+
3.0030249023437534,
|
| 348 |
+
3.511061706542975
|
| 349 |
+
],
|
| 350 |
+
"q_pooled": 2.5256872558593813
|
| 351 |
+
}
|
| 352 |
+
}
|
| 353 |
+
},
|
| 354 |
+
"DEXUSEU": {
|
| 355 |
+
"arima": {
|
| 356 |
+
"forecaster": "arima",
|
| 357 |
+
"n_cal": 30,
|
| 358 |
+
"n_test": 30,
|
| 359 |
+
"conf=0.8": {
|
| 360 |
+
"nominal_coverage": 0.8,
|
| 361 |
+
"bare_coverage_mean": 0.8595238095238095,
|
| 362 |
+
"bare_width_mean": 0.037255051394705835,
|
| 363 |
+
"perhorizon_coverage_mean": 0.811904761904762,
|
| 364 |
+
"perhorizon_width_mean": 0.03243267317446737,
|
| 365 |
+
"pooled_coverage_mean": 0.8166666666666665,
|
| 366 |
+
"pooled_width_mean": 0.031645107249388627,
|
| 367 |
+
"q_per_horizon": [
|
| 368 |
+
0.006537154478817753,
|
| 369 |
+
0.007333177556922088,
|
| 370 |
+
0.012312774872748289,
|
| 371 |
+
0.014043924961390397,
|
| 372 |
+
0.016017799097016727,
|
| 373 |
+
0.015644421534730224,
|
| 374 |
+
0.016336252170641608,
|
| 375 |
+
0.016122979608933496,
|
| 376 |
+
0.01964457489050009,
|
| 377 |
+
0.02072169154979453,
|
| 378 |
+
0.024118006869554565,
|
| 379 |
+
0.018656617879449167,
|
| 380 |
+
0.017769218599013037,
|
| 381 |
+
0.021770118151759554
|
| 382 |
+
],
|
| 383 |
+
"q_pooled": 0.015822553624694313
|
| 384 |
+
},
|
| 385 |
+
"conf=0.9": {
|
| 386 |
+
"nominal_coverage": 0.9,
|
| 387 |
+
"bare_coverage_mean": 0.9142857142857144,
|
| 388 |
+
"bare_width_mean": 0.047816340798432555,
|
| 389 |
+
"perhorizon_coverage_mean": 0.8904761904761905,
|
| 390 |
+
"perhorizon_width_mean": 0.04285578362084427,
|
| 391 |
+
"pooled_coverage_mean": 0.8809523809523809,
|
| 392 |
+
"pooled_width_mean": 0.041073044538626924,
|
| 393 |
+
"q_per_horizon": [
|
| 394 |
+
0.006761841674864266,
|
| 395 |
+
0.01182171512244512,
|
| 396 |
+
0.015822553624694313,
|
| 397 |
+
0.02093465874643763,
|
| 398 |
+
0.019889187414578124,
|
| 399 |
+
0.01963882946285489,
|
| 400 |
+
0.02190089656490879,
|
| 401 |
+
0.021692702530445862,
|
| 402 |
+
0.024590684771490512,
|
| 403 |
+
0.024756601121440625,
|
| 404 |
+
0.02609594060524123,
|
| 405 |
+
0.02889462135779275,
|
| 406 |
+
0.02689529861576956,
|
| 407 |
+
0.030294953732946217
|
| 408 |
+
],
|
| 409 |
+
"q_pooled": 0.020536522269313462
|
| 410 |
+
},
|
| 411 |
+
"conf=0.95": {
|
| 412 |
+
"nominal_coverage": 0.95,
|
| 413 |
+
"bare_coverage_mean": 0.9380952380952381,
|
| 414 |
+
"bare_width_mean": 0.05697668430905675,
|
| 415 |
+
"perhorizon_coverage_mean": 0.9404761904761906,
|
| 416 |
+
"perhorizon_width_mean": 0.05919364307194989,
|
| 417 |
+
"pooled_coverage_mean": 0.9119047619047618,
|
| 418 |
+
"pooled_width_mean": 0.05176715217769701,
|
| 419 |
+
"q_per_horizon": [
|
| 420 |
+
0.011752772972313252,
|
| 421 |
+
0.01247253748338717,
|
| 422 |
+
0.01748801536532918,
|
| 423 |
+
0.02383577073487353,
|
| 424 |
+
0.02364315675893547,
|
| 425 |
+
0.02218707632552186,
|
| 426 |
+
0.03203504055001494,
|
| 427 |
+
0.030332454296178923,
|
| 428 |
+
0.03750274950896193,
|
| 429 |
+
0.03613221732608629,
|
| 430 |
+
0.039232376756770826,
|
| 431 |
+
0.04010448928765342,
|
| 432 |
+
0.04080440634480942,
|
| 433 |
+
0.046832437792812875
|
| 434 |
+
],
|
| 435 |
+
"q_pooled": 0.025883576088848503
|
| 436 |
+
}
|
| 437 |
+
},
|
| 438 |
+
"chronos": {
|
| 439 |
+
"forecaster": "chronos",
|
| 440 |
+
"n_cal": 30,
|
| 441 |
+
"n_test": 30,
|
| 442 |
+
"conf=0.8": {
|
| 443 |
+
"nominal_coverage": 0.8,
|
| 444 |
+
"bare_coverage_mean": 0.8,
|
| 445 |
+
"bare_width_mean": 0.03301220412055651,
|
| 446 |
+
"perhorizon_coverage_mean": 0.8071428571428574,
|
| 447 |
+
"perhorizon_width_mean": 0.03432217042105538,
|
| 448 |
+
"pooled_coverage_mean": 0.8000000000000002,
|
| 449 |
+
"pooled_width_mean": 0.03300358161926287,
|
| 450 |
+
"q_per_horizon": [
|
| 451 |
+
0.004584144783019939,
|
| 452 |
+
0.007060681152343706,
|
| 453 |
+
0.01243185882568354,
|
| 454 |
+
0.01602103652954101,
|
| 455 |
+
0.01641003990173351,
|
| 456 |
+
0.015545682907104563,
|
| 457 |
+
0.018368010711669935,
|
| 458 |
+
0.01898662319183342,
|
| 459 |
+
0.022148969459533596,
|
| 460 |
+
0.02255078582763681,
|
| 461 |
+
0.023978458976745554,
|
| 462 |
+
0.020319693946838413,
|
| 463 |
+
0.017313012123107985,
|
| 464 |
+
0.024536194610595752
|
| 465 |
+
],
|
| 466 |
+
"q_pooled": 0.016501790809631434
|
| 467 |
+
},
|
| 468 |
+
"conf=0.9": {
|
| 469 |
+
"nominal_coverage": 0.9,
|
| 470 |
+
"bare_coverage_mean": 0.8,
|
| 471 |
+
"bare_width_mean": 0.03301220412055651,
|
| 472 |
+
"perhorizon_coverage_mean": 0.9190476190476191,
|
| 473 |
+
"perhorizon_width_mean": 0.05077633157457622,
|
| 474 |
+
"pooled_coverage_mean": 0.8904761904761905,
|
| 475 |
+
"pooled_width_mean": 0.04548504829406719,
|
| 476 |
+
"q_per_horizon": [
|
| 477 |
+
0.008554865837097081,
|
| 478 |
+
0.00971177463531503,
|
| 479 |
+
0.01530143814086915,
|
| 480 |
+
0.01911055355072011,
|
| 481 |
+
0.01780367832183849,
|
| 482 |
+
0.021554478836059543,
|
| 483 |
+
0.026538812255859412,
|
| 484 |
+
0.027544754409789984,
|
| 485 |
+
0.028936708450317372,
|
| 486 |
+
0.03478273067474369,
|
| 487 |
+
0.0382537099838256,
|
| 488 |
+
0.03136329650878911,
|
| 489 |
+
0.0327265468597413,
|
| 490 |
+
0.04325097255706778
|
| 491 |
+
],
|
| 492 |
+
"q_pooled": 0.022742524147033594
|
| 493 |
+
},
|
| 494 |
+
"conf=0.95": {
|
| 495 |
+
"nominal_coverage": 0.95,
|
| 496 |
+
"bare_coverage_mean": 0.8,
|
| 497 |
+
"bare_width_mean": 0.03301220412055651,
|
| 498 |
+
"perhorizon_coverage_mean": 0.9404761904761905,
|
| 499 |
+
"perhorizon_width_mean": 0.0633313385554722,
|
| 500 |
+
"pooled_coverage_mean": 0.9547619047619046,
|
| 501 |
+
"pooled_width_mean": 0.06135401725769052,
|
| 502 |
+
"q_per_horizon": [
|
| 503 |
+
0.011944815063476666,
|
| 504 |
+
0.01392391796112058,
|
| 505 |
+
0.017532272148132355,
|
| 506 |
+
0.022742524147033594,
|
| 507 |
+
0.02558988399505613,
|
| 508 |
+
0.02623647480010982,
|
| 509 |
+
0.03067700862884526,
|
| 510 |
+
0.034072942352294966,
|
| 511 |
+
0.04179227085113535,
|
| 512 |
+
0.0389519283294677,
|
| 513 |
+
0.042779201126098565,
|
| 514 |
+
0.04429976444244388,
|
| 515 |
+
0.044917986869811966,
|
| 516 |
+
0.04785837917327873
|
| 517 |
+
],
|
| 518 |
+
"q_pooled": 0.03067700862884526
|
| 519 |
+
}
|
| 520 |
+
}
|
| 521 |
+
},
|
| 522 |
+
"DEXCHUS": {
|
| 523 |
+
"arima": {
|
| 524 |
+
"forecaster": "arima",
|
| 525 |
+
"n_cal": 30,
|
| 526 |
+
"n_test": 30,
|
| 527 |
+
"conf=0.8": {
|
| 528 |
+
"nominal_coverage": 0.8,
|
| 529 |
+
"bare_coverage_mean": 0.8309523809523809,
|
| 530 |
+
"bare_width_mean": 0.12023258914287749,
|
| 531 |
+
"perhorizon_coverage_mean": 0.8,
|
| 532 |
+
"perhorizon_width_mean": 0.10379373004234645,
|
| 533 |
+
"pooled_coverage_mean": 0.7833333333333333,
|
| 534 |
+
"pooled_width_mean": 0.0905579673492376,
|
| 535 |
+
"q_per_horizon": [
|
| 536 |
+
0.01913552539082275,
|
| 537 |
+
0.021503803498270635,
|
| 538 |
+
0.03202273363733443,
|
| 539 |
+
0.04471228016293516,
|
| 540 |
+
0.04595743067166769,
|
| 541 |
+
0.057142529866381686,
|
| 542 |
+
0.041567074905930035,
|
| 543 |
+
0.05922440211999547,
|
| 544 |
+
0.06055238630005544,
|
| 545 |
+
0.06195863987337091,
|
| 546 |
+
0.07735612435271388,
|
| 547 |
+
0.07482211423245033,
|
| 548 |
+
0.0613510301071134,
|
| 549 |
+
0.06925003517738304
|
| 550 |
+
],
|
| 551 |
+
"q_pooled": 0.0452789836746188
|
| 552 |
+
},
|
| 553 |
+
"conf=0.9": {
|
| 554 |
+
"nominal_coverage": 0.9,
|
| 555 |
+
"bare_coverage_mean": 0.8761904761904763,
|
| 556 |
+
"bare_width_mean": 0.1543168575080998,
|
| 557 |
+
"perhorizon_coverage_mean": 0.8857142857142858,
|
| 558 |
+
"perhorizon_width_mean": 0.1694623051285068,
|
| 559 |
+
"pooled_coverage_mean": 0.8833333333333333,
|
| 560 |
+
"pooled_width_mean": 0.14964422846490066,
|
| 561 |
+
"q_per_horizon": [
|
| 562 |
+
0.026065770883445083,
|
| 563 |
+
0.03663070092160048,
|
| 564 |
+
0.04814005922096687,
|
| 565 |
+
0.05434837199719045,
|
| 566 |
+
0.06341843160370875,
|
| 567 |
+
0.06742875148755179,
|
| 568 |
+
0.08909509445192665,
|
| 569 |
+
0.09169474000207156,
|
| 570 |
+
0.11607218346504666,
|
| 571 |
+
0.12686121412365825,
|
| 572 |
+
0.11025109977698122,
|
| 573 |
+
0.12555183014476246,
|
| 574 |
+
0.11555182580724122,
|
| 575 |
+
0.11512606201339626
|
| 576 |
+
],
|
| 577 |
+
"q_pooled": 0.07482211423245033
|
| 578 |
+
},
|
| 579 |
+
"conf=0.95": {
|
| 580 |
+
"nominal_coverage": 0.95,
|
| 581 |
+
"bare_coverage_mean": 0.9142857142857144,
|
| 582 |
+
"bare_width_mean": 0.18387987719237844,
|
| 583 |
+
"perhorizon_coverage_mean": 0.9523809523809524,
|
| 584 |
+
"perhorizon_width_mean": 0.2451580685008066,
|
| 585 |
+
"pooled_coverage_mean": 0.9285714285714286,
|
| 586 |
+
"pooled_width_mean": 0.22228302327474836,
|
| 587 |
+
"q_per_horizon": [
|
| 588 |
+
0.032681838125458995,
|
| 589 |
+
0.07173662444320072,
|
| 590 |
+
0.06519382424998543,
|
| 591 |
+
0.06079908928748701,
|
| 592 |
+
0.09872806564422376,
|
| 593 |
+
0.10867467864500302,
|
| 594 |
+
0.11114151163737418,
|
| 595 |
+
0.14390234892072673,
|
| 596 |
+
0.14109477023066574,
|
| 597 |
+
0.1721305319733375,
|
| 598 |
+
0.17782669739203882,
|
| 599 |
+
0.18559857212707964,
|
| 600 |
+
0.17849914242157627,
|
| 601 |
+
0.16809878440748793
|
| 602 |
+
],
|
| 603 |
+
"q_pooled": 0.11114151163737418
|
| 604 |
+
}
|
| 605 |
+
},
|
| 606 |
+
"chronos": {
|
| 607 |
+
"forecaster": "chronos",
|
| 608 |
+
"n_cal": 30,
|
| 609 |
+
"n_test": 30,
|
| 610 |
+
"conf=0.8": {
|
| 611 |
+
"nominal_coverage": 0.8,
|
| 612 |
+
"bare_coverage_mean": 0.8428571428571429,
|
| 613 |
+
"bare_width_mean": 0.11959348532060782,
|
| 614 |
+
"perhorizon_coverage_mean": 0.7833333333333333,
|
| 615 |
+
"perhorizon_width_mean": 0.10019261191231878,
|
| 616 |
+
"pooled_coverage_mean": 0.8,
|
| 617 |
+
"pooled_width_mean": 0.09779591979980395,
|
| 618 |
+
"q_per_horizon": [
|
| 619 |
+
0.025188607788085626,
|
| 620 |
+
0.02532754745483423,
|
| 621 |
+
0.03890764770507804,
|
| 622 |
+
0.043802440643310625,
|
| 623 |
+
0.04915690460205102,
|
| 624 |
+
0.04680775070190446,
|
| 625 |
+
0.03916668243408239,
|
| 626 |
+
0.04809946746826199,
|
| 627 |
+
0.0576093139648437,
|
| 628 |
+
0.06108116531372065,
|
| 629 |
+
0.05864996337890638,
|
| 630 |
+
0.06179137878417951,
|
| 631 |
+
0.0701272941589357,
|
| 632 |
+
0.0756321189880369
|
| 633 |
+
],
|
| 634 |
+
"q_pooled": 0.04889795989990198
|
| 635 |
+
},
|
| 636 |
+
"conf=0.9": {
|
| 637 |
+
"nominal_coverage": 0.9,
|
| 638 |
+
"bare_coverage_mean": 0.8428571428571429,
|
| 639 |
+
"bare_width_mean": 0.11959348532060782,
|
| 640 |
+
"perhorizon_coverage_mean": 0.869047619047619,
|
| 641 |
+
"perhorizon_width_mean": 0.16607914559500545,
|
| 642 |
+
"pooled_coverage_mean": 0.861904761904762,
|
| 643 |
+
"pooled_width_mean": 0.1402545883178714,
|
| 644 |
+
"q_per_horizon": [
|
| 645 |
+
0.030081840515136626,
|
| 646 |
+
0.04935519256591814,
|
| 647 |
+
0.046391881561278936,
|
| 648 |
+
0.050782734680176134,
|
| 649 |
+
0.06024611434936489,
|
| 650 |
+
0.06782592163085965,
|
| 651 |
+
0.08113353042602522,
|
| 652 |
+
0.09840077590942364,
|
| 653 |
+
0.11880251922607421,
|
| 654 |
+
0.12758038635253932,
|
| 655 |
+
0.10697886581420857,
|
| 656 |
+
0.12221163177490268,
|
| 657 |
+
0.10586601409912078,
|
| 658 |
+
0.09689661026000973
|
| 659 |
+
],
|
| 660 |
+
"q_pooled": 0.0701272941589357
|
| 661 |
+
},
|
| 662 |
+
"conf=0.95": {
|
| 663 |
+
"nominal_coverage": 0.95,
|
| 664 |
+
"bare_coverage_mean": 0.8428571428571429,
|
| 665 |
+
"bare_width_mean": 0.11959348532060782,
|
| 666 |
+
"perhorizon_coverage_mean": 0.9214285714285714,
|
| 667 |
+
"perhorizon_width_mean": 0.22292400338309162,
|
| 668 |
+
"pooled_coverage_mean": 0.9095238095238095,
|
| 669 |
+
"pooled_width_mean": 0.2085365203857421,
|
| 670 |
+
"q_per_horizon": [
|
| 671 |
+
0.03159678268432575,
|
| 672 |
+
0.07481312255859418,
|
| 673 |
+
0.07034568023681675,
|
| 674 |
+
0.05222851562499997,
|
| 675 |
+
0.070854161071777,
|
| 676 |
+
0.09303555068969693,
|
| 677 |
+
0.08751402359008775,
|
| 678 |
+
0.13737474822998053,
|
| 679 |
+
0.1317485343933109,
|
| 680 |
+
0.15814713668823277,
|
| 681 |
+
0.1641494514465336,
|
| 682 |
+
0.1720175582885739,
|
| 683 |
+
0.16296061859130884,
|
| 684 |
+
0.15368213958740196
|
| 685 |
+
],
|
| 686 |
+
"q_pooled": 0.10426826019287105
|
| 687 |
+
}
|
| 688 |
+
}
|
| 689 |
+
},
|
| 690 |
+
"DEXKOUS": {
|
| 691 |
+
"arima": {
|
| 692 |
+
"forecaster": "arima",
|
| 693 |
+
"n_cal": 30,
|
| 694 |
+
"n_test": 30,
|
| 695 |
+
"conf=0.8": {
|
| 696 |
+
"nominal_coverage": 0.8,
|
| 697 |
+
"bare_coverage_mean": 0.7071428571428572,
|
| 698 |
+
"bare_width_mean": 41.40702231782995,
|
| 699 |
+
"perhorizon_coverage_mean": 0.6809523809523808,
|
| 700 |
+
"perhorizon_width_mean": 40.33834903476961,
|
| 701 |
+
"pooled_coverage_mean": 0.738095238095238,
|
| 702 |
+
"pooled_width_mean": 40.174430225697506,
|
| 703 |
+
"q_per_horizon": [
|
| 704 |
+
6.019828757339383,
|
| 705 |
+
9.23651622262787,
|
| 706 |
+
11.885457212575375,
|
| 707 |
+
14.301239776206785,
|
| 708 |
+
16.538830978627857,
|
| 709 |
+
21.11794087612452,
|
| 710 |
+
21.007107424806236,
|
| 711 |
+
22.089443667480282,
|
| 712 |
+
22.26134568228099,
|
| 713 |
+
25.115703414253176,
|
| 714 |
+
26.282158971560648,
|
| 715 |
+
28.31230917980338,
|
| 716 |
+
28.622331265376488,
|
| 717 |
+
29.57822981432423
|
| 718 |
+
],
|
| 719 |
+
"q_pooled": 20.087215112848753
|
| 720 |
+
},
|
| 721 |
+
"conf=0.9": {
|
| 722 |
+
"nominal_coverage": 0.9,
|
| 723 |
+
"bare_coverage_mean": 0.8023809523809522,
|
| 724 |
+
"bare_width_mean": 53.145337785764546,
|
| 725 |
+
"perhorizon_coverage_mean": 0.7476190476190475,
|
| 726 |
+
"perhorizon_width_mean": 47.514067959856646,
|
| 727 |
+
"pooled_coverage_mean": 0.8166666666666665,
|
| 728 |
+
"pooled_width_mean": 51.703697664495394,
|
| 729 |
+
"q_per_horizon": [
|
| 730 |
+
7.042854649616629,
|
| 731 |
+
11.217728114270585,
|
| 732 |
+
13.051289508962782,
|
| 733 |
+
17.974908318198914,
|
| 734 |
+
22.696578397519033,
|
| 735 |
+
24.786648186653792,
|
| 736 |
+
23.205692899009136,
|
| 737 |
+
25.439228843483306,
|
| 738 |
+
28.745883742858496,
|
| 739 |
+
27.649073917800933,
|
| 740 |
+
32.25531441260455,
|
| 741 |
+
33.39915882237847,
|
| 742 |
+
32.317174372199815,
|
| 743 |
+
32.81694153344006
|
| 744 |
+
],
|
| 745 |
+
"q_pooled": 25.851848832247697
|
| 746 |
+
},
|
| 747 |
+
"conf=0.95": {
|
| 748 |
+
"nominal_coverage": 0.95,
|
| 749 |
+
"bare_coverage_mean": 0.8952380952380953,
|
| 750 |
+
"bare_width_mean": 63.326575872509096,
|
| 751 |
+
"perhorizon_coverage_mean": 0.8833333333333332,
|
| 752 |
+
"perhorizon_width_mean": 62.3317263081943,
|
| 753 |
+
"pooled_coverage_mean": 0.861904761904762,
|
| 754 |
+
"pooled_width_mean": 63.003314010262784,
|
| 755 |
+
"q_per_horizon": [
|
| 756 |
+
12.416104342710696,
|
| 757 |
+
13.332090802595758,
|
| 758 |
+
20.658854986845654,
|
| 759 |
+
37.144614564726226,
|
| 760 |
+
31.230195571947434,
|
| 761 |
+
31.501657005131392,
|
| 762 |
+
31.466225645210898,
|
| 763 |
+
32.67178752649829,
|
| 764 |
+
41.05990019882688,
|
| 765 |
+
37.85425421989498,
|
| 766 |
+
37.08859079038166,
|
| 767 |
+
35.26046070337611,
|
| 768 |
+
40.538744747242845,
|
| 769 |
+
34.098603051971395
|
| 770 |
+
],
|
| 771 |
+
"q_pooled": 31.501657005131392
|
| 772 |
+
}
|
| 773 |
+
},
|
| 774 |
+
"chronos": {
|
| 775 |
+
"forecaster": "chronos",
|
| 776 |
+
"n_cal": 30,
|
| 777 |
+
"n_test": 30,
|
| 778 |
+
"conf=0.8": {
|
| 779 |
+
"nominal_coverage": 0.8,
|
| 780 |
+
"bare_coverage_mean": 0.7476190476190475,
|
| 781 |
+
"bare_width_mean": 47.698866081237796,
|
| 782 |
+
"perhorizon_coverage_mean": 0.669047619047619,
|
| 783 |
+
"perhorizon_width_mean": 42.05718540736606,
|
| 784 |
+
"pooled_coverage_mean": 0.7452380952380951,
|
| 785 |
+
"pooled_width_mean": 43.94189453125,
|
| 786 |
+
"q_per_horizon": [
|
| 787 |
+
6.6086572265624,
|
| 788 |
+
8.688681640624964,
|
| 789 |
+
11.395966796874973,
|
| 790 |
+
12.880576171874964,
|
| 791 |
+
17.0732275390626,
|
| 792 |
+
19.5968017578125,
|
| 793 |
+
19.40576171875,
|
| 794 |
+
24.150083007812555,
|
| 795 |
+
24.586870117187573,
|
| 796 |
+
26.251137695312536,
|
| 797 |
+
27.594218749999982,
|
| 798 |
+
32.349785156249936,
|
| 799 |
+
31.7150732421876,
|
| 800 |
+
32.103457031249945
|
| 801 |
+
],
|
| 802 |
+
"q_pooled": 21.970947265625
|
| 803 |
+
},
|
| 804 |
+
"conf=0.9": {
|
| 805 |
+
"nominal_coverage": 0.9,
|
| 806 |
+
"bare_coverage_mean": 0.7476190476190475,
|
| 807 |
+
"bare_width_mean": 47.698866081237796,
|
| 808 |
+
"perhorizon_coverage_mean": 0.7714285714285712,
|
| 809 |
+
"perhorizon_width_mean": 49.80674665178569,
|
| 810 |
+
"pooled_coverage_mean": 0.8357142857142856,
|
| 811 |
+
"pooled_width_mean": 56.23533203124998,
|
| 812 |
+
"q_per_horizon": [
|
| 813 |
+
8.360268554687536,
|
| 814 |
+
12.467915039062518,
|
| 815 |
+
14.159082031249909,
|
| 816 |
+
18.2329248046874,
|
| 817 |
+
23.688662109374945,
|
| 818 |
+
25.474423828125055,
|
| 819 |
+
24.956616210937455,
|
| 820 |
+
26.577456054687445,
|
| 821 |
+
28.821977539062573,
|
| 822 |
+
30.2672265624999,
|
| 823 |
+
33.08205566406241,
|
| 824 |
+
33.05286621093751,
|
| 825 |
+
33.24584472656261,
|
| 826 |
+
36.25990722656252
|
| 827 |
+
],
|
| 828 |
+
"q_pooled": 28.11766601562499
|
| 829 |
+
},
|
| 830 |
+
"conf=0.95": {
|
| 831 |
+
"nominal_coverage": 0.95,
|
| 832 |
+
"bare_coverage_mean": 0.7476190476190475,
|
| 833 |
+
"bare_width_mean": 47.698866081237796,
|
| 834 |
+
"perhorizon_coverage_mean": 0.8738095238095237,
|
| 835 |
+
"perhorizon_width_mean": 65.5785993303571,
|
| 836 |
+
"pooled_coverage_mean": 0.8666666666666666,
|
| 837 |
+
"pooled_width_mean": 66.16411132812482,
|
| 838 |
+
"q_per_horizon": [
|
| 839 |
+
14.446508789062591,
|
| 840 |
+
15.035361328124964,
|
| 841 |
+
21.486127929687427,
|
| 842 |
+
38.963662109375036,
|
| 843 |
+
33.86973144531248,
|
| 844 |
+
34.60525878906242,
|
| 845 |
+
33.86685546874992,
|
| 846 |
+
33.722353515624945,
|
| 847 |
+
41.170214843750045,
|
| 848 |
+
36.77112792968751,
|
| 849 |
+
37.77993652343753,
|
| 850 |
+
39.08779296874991,
|
| 851 |
+
39.80886230468741,
|
| 852 |
+
38.4364013671875
|
| 853 |
+
],
|
| 854 |
+
"q_pooled": 33.08205566406241
|
| 855 |
+
}
|
| 856 |
+
}
|
| 857 |
+
}
|
| 858 |
+
},
|
| 859 |
+
"elapsed_min": 1.141351056098938
|
| 860 |
}
|
FINAL_SUBMIT/receipts/R6_GETHSEMANE.json
CHANGED
|
@@ -1,122 +1,122 @@
|
|
| 1 |
-
{
|
| 2 |
-
"tasks": {
|
| 3 |
-
"easy_typhoon_response": {
|
| 4 |
-
"ppo_v3": {
|
| 5 |
-
"policy": "ppo_v3",
|
| 6 |
-
"n_episodes": 50,
|
| 7 |
-
"reward_mean": 1.2005000000000001,
|
| 8 |
-
"reward_std": 0.19939637032804786,
|
| 9 |
-
"reward_min": 0.643,
|
| 10 |
-
"reward_max": 1.3435000000000004,
|
| 11 |
-
"length_mean": 20.0,
|
| 12 |
-
"violations_mean": 0.0,
|
| 13 |
-
"violations_max": 0,
|
| 14 |
-
"train_time_s": 389.36543345451355,
|
| 15 |
-
"total_timesteps": 100000
|
| 16 |
-
},
|
| 17 |
-
"random": {
|
| 18 |
-
"policy": "random",
|
| 19 |
-
"n_episodes": 50,
|
| 20 |
-
"reward_mean": 0.7797316807490356,
|
| 21 |
-
"reward_std": 0.12419262667905032,
|
| 22 |
-
"reward_min": 0.5059697476286091,
|
| 23 |
-
"reward_max": 1.009169047501108,
|
| 24 |
-
"length_mean": 20.0,
|
| 25 |
-
"violations_mean": 0.0,
|
| 26 |
-
"violations_max": 0
|
| 27 |
-
},
|
| 28 |
-
"greedy": {
|
| 29 |
-
"policy": "greedy",
|
| 30 |
-
"n_episodes": 50,
|
| 31 |
-
"reward_mean": 0.9803400000000001,
|
| 32 |
-
"reward_std": 0.0062695215128429176,
|
| 33 |
-
"reward_min": 0.964,
|
| 34 |
-
"reward_max": 0.9894999999999999,
|
| 35 |
-
"length_mean": 20.0,
|
| 36 |
-
"violations_mean": 0.0,
|
| 37 |
-
"violations_max": 0
|
| 38 |
-
}
|
| 39 |
-
},
|
| 40 |
-
"medium_multi_front": {
|
| 41 |
-
"ppo_v3": {
|
| 42 |
-
"policy": "ppo_v3",
|
| 43 |
-
"n_episodes": 50,
|
| 44 |
-
"reward_mean": 2.774816094381805,
|
| 45 |
-
"reward_std": 0.2510891195507745,
|
| 46 |
-
"reward_min": 2.2131947145395343,
|
| 47 |
-
"reward_max": 3.1306422226861352,
|
| 48 |
-
"length_mean": 44.76,
|
| 49 |
-
"violations_mean": 0.0,
|
| 50 |
-
"violations_max": 0,
|
| 51 |
-
"train_time_s": 1028.4124627113342,
|
| 52 |
-
"total_timesteps": 100000
|
| 53 |
-
},
|
| 54 |
-
"random": {
|
| 55 |
-
"policy": "random",
|
| 56 |
-
"n_episodes": 50,
|
| 57 |
-
"reward_mean": -1.1101909893619986,
|
| 58 |
-
"reward_std": 0.8109045133638636,
|
| 59 |
-
"reward_min": -2.3839605638376136,
|
| 60 |
-
"reward_max": 0.6624458826285525,
|
| 61 |
-
"length_mean": 44.84,
|
| 62 |
-
"violations_mean": 0.0,
|
| 63 |
-
"violations_max": 0
|
| 64 |
-
},
|
| 65 |
-
"greedy": {
|
| 66 |
-
"policy": "greedy",
|
| 67 |
-
"n_episodes": 50,
|
| 68 |
-
"reward_mean": -1.7960883333333333,
|
| 69 |
-
"reward_std": 0.08206659628009437,
|
| 70 |
-
"reward_min": -1.9960833333333332,
|
| 71 |
-
"reward_max": -1.6348333333333334,
|
| 72 |
-
"length_mean": 44.76,
|
| 73 |
-
"violations_mean": 0.0,
|
| 74 |
-
"violations_max": 0
|
| 75 |
-
}
|
| 76 |
-
},
|
| 77 |
-
"hard_cascading_crisis": {
|
| 78 |
-
"ppo_v3": {
|
| 79 |
-
"policy": "ppo_v3",
|
| 80 |
-
"n_episodes": 50,
|
| 81 |
-
"reward_mean": 2.67403629887518,
|
| 82 |
-
"reward_std": 0.7949077297864112,
|
| 83 |
-
"reward_min": 0.44374348685637904,
|
| 84 |
-
"reward_max": 3.4482740553083278,
|
| 85 |
-
"length_mean": 56.06,
|
| 86 |
-
"violations_mean": 0.0,
|
| 87 |
-
"violations_max": 0,
|
| 88 |
-
"train_time_s": 1359.914410352707,
|
| 89 |
-
"total_timesteps": 100000
|
| 90 |
-
},
|
| 91 |
-
"random": {
|
| 92 |
-
"policy": "random",
|
| 93 |
-
"n_episodes": 50,
|
| 94 |
-
"reward_mean": -1.222005001736981,
|
| 95 |
-
"reward_std": 0.853497432761393,
|
| 96 |
-
"reward_min": -3.8651570083150526,
|
| 97 |
-
"reward_max": 0.6500552441714463,
|
| 98 |
-
"length_mean": 56.06,
|
| 99 |
-
"violations_mean": 0.0,
|
| 100 |
-
"violations_max": 0
|
| 101 |
-
},
|
| 102 |
-
"greedy": {
|
| 103 |
-
"policy": "greedy",
|
| 104 |
-
"n_episodes": 50,
|
| 105 |
-
"reward_mean": -1.4125516666666666,
|
| 106 |
-
"reward_std": 0.4515386177313937,
|
| 107 |
-
"reward_min": -2.3674999999999997,
|
| 108 |
-
"reward_max": -0.4405833333333334,
|
| 109 |
-
"length_mean": 56.06,
|
| 110 |
-
"violations_mean": 0.0,
|
| 111 |
-
"violations_max": 0
|
| 112 |
-
}
|
| 113 |
-
}
|
| 114 |
-
},
|
| 115 |
-
"baselines": {},
|
| 116 |
-
"config": {
|
| 117 |
-
"timesteps_per_task": 100000,
|
| 118 |
-
"eval_episodes": 50,
|
| 119 |
-
"seed": 42
|
| 120 |
-
},
|
| 121 |
-
"elapsed_min": 48.6515386501948
|
| 122 |
}
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"tasks": {
|
| 3 |
+
"easy_typhoon_response": {
|
| 4 |
+
"ppo_v3": {
|
| 5 |
+
"policy": "ppo_v3",
|
| 6 |
+
"n_episodes": 50,
|
| 7 |
+
"reward_mean": 1.2005000000000001,
|
| 8 |
+
"reward_std": 0.19939637032804786,
|
| 9 |
+
"reward_min": 0.643,
|
| 10 |
+
"reward_max": 1.3435000000000004,
|
| 11 |
+
"length_mean": 20.0,
|
| 12 |
+
"violations_mean": 0.0,
|
| 13 |
+
"violations_max": 0,
|
| 14 |
+
"train_time_s": 389.36543345451355,
|
| 15 |
+
"total_timesteps": 100000
|
| 16 |
+
},
|
| 17 |
+
"random": {
|
| 18 |
+
"policy": "random",
|
| 19 |
+
"n_episodes": 50,
|
| 20 |
+
"reward_mean": 0.7797316807490356,
|
| 21 |
+
"reward_std": 0.12419262667905032,
|
| 22 |
+
"reward_min": 0.5059697476286091,
|
| 23 |
+
"reward_max": 1.009169047501108,
|
| 24 |
+
"length_mean": 20.0,
|
| 25 |
+
"violations_mean": 0.0,
|
| 26 |
+
"violations_max": 0
|
| 27 |
+
},
|
| 28 |
+
"greedy": {
|
| 29 |
+
"policy": "greedy",
|
| 30 |
+
"n_episodes": 50,
|
| 31 |
+
"reward_mean": 0.9803400000000001,
|
| 32 |
+
"reward_std": 0.0062695215128429176,
|
| 33 |
+
"reward_min": 0.964,
|
| 34 |
+
"reward_max": 0.9894999999999999,
|
| 35 |
+
"length_mean": 20.0,
|
| 36 |
+
"violations_mean": 0.0,
|
| 37 |
+
"violations_max": 0
|
| 38 |
+
}
|
| 39 |
+
},
|
| 40 |
+
"medium_multi_front": {
|
| 41 |
+
"ppo_v3": {
|
| 42 |
+
"policy": "ppo_v3",
|
| 43 |
+
"n_episodes": 50,
|
| 44 |
+
"reward_mean": 2.774816094381805,
|
| 45 |
+
"reward_std": 0.2510891195507745,
|
| 46 |
+
"reward_min": 2.2131947145395343,
|
| 47 |
+
"reward_max": 3.1306422226861352,
|
| 48 |
+
"length_mean": 44.76,
|
| 49 |
+
"violations_mean": 0.0,
|
| 50 |
+
"violations_max": 0,
|
| 51 |
+
"train_time_s": 1028.4124627113342,
|
| 52 |
+
"total_timesteps": 100000
|
| 53 |
+
},
|
| 54 |
+
"random": {
|
| 55 |
+
"policy": "random",
|
| 56 |
+
"n_episodes": 50,
|
| 57 |
+
"reward_mean": -1.1101909893619986,
|
| 58 |
+
"reward_std": 0.8109045133638636,
|
| 59 |
+
"reward_min": -2.3839605638376136,
|
| 60 |
+
"reward_max": 0.6624458826285525,
|
| 61 |
+
"length_mean": 44.84,
|
| 62 |
+
"violations_mean": 0.0,
|
| 63 |
+
"violations_max": 0
|
| 64 |
+
},
|
| 65 |
+
"greedy": {
|
| 66 |
+
"policy": "greedy",
|
| 67 |
+
"n_episodes": 50,
|
| 68 |
+
"reward_mean": -1.7960883333333333,
|
| 69 |
+
"reward_std": 0.08206659628009437,
|
| 70 |
+
"reward_min": -1.9960833333333332,
|
| 71 |
+
"reward_max": -1.6348333333333334,
|
| 72 |
+
"length_mean": 44.76,
|
| 73 |
+
"violations_mean": 0.0,
|
| 74 |
+
"violations_max": 0
|
| 75 |
+
}
|
| 76 |
+
},
|
| 77 |
+
"hard_cascading_crisis": {
|
| 78 |
+
"ppo_v3": {
|
| 79 |
+
"policy": "ppo_v3",
|
| 80 |
+
"n_episodes": 50,
|
| 81 |
+
"reward_mean": 2.67403629887518,
|
| 82 |
+
"reward_std": 0.7949077297864112,
|
| 83 |
+
"reward_min": 0.44374348685637904,
|
| 84 |
+
"reward_max": 3.4482740553083278,
|
| 85 |
+
"length_mean": 56.06,
|
| 86 |
+
"violations_mean": 0.0,
|
| 87 |
+
"violations_max": 0,
|
| 88 |
+
"train_time_s": 1359.914410352707,
|
| 89 |
+
"total_timesteps": 100000
|
| 90 |
+
},
|
| 91 |
+
"random": {
|
| 92 |
+
"policy": "random",
|
| 93 |
+
"n_episodes": 50,
|
| 94 |
+
"reward_mean": -1.222005001736981,
|
| 95 |
+
"reward_std": 0.853497432761393,
|
| 96 |
+
"reward_min": -3.8651570083150526,
|
| 97 |
+
"reward_max": 0.6500552441714463,
|
| 98 |
+
"length_mean": 56.06,
|
| 99 |
+
"violations_mean": 0.0,
|
| 100 |
+
"violations_max": 0
|
| 101 |
+
},
|
| 102 |
+
"greedy": {
|
| 103 |
+
"policy": "greedy",
|
| 104 |
+
"n_episodes": 50,
|
| 105 |
+
"reward_mean": -1.4125516666666666,
|
| 106 |
+
"reward_std": 0.4515386177313937,
|
| 107 |
+
"reward_min": -2.3674999999999997,
|
| 108 |
+
"reward_max": -0.4405833333333334,
|
| 109 |
+
"length_mean": 56.06,
|
| 110 |
+
"violations_mean": 0.0,
|
| 111 |
+
"violations_max": 0
|
| 112 |
+
}
|
| 113 |
+
}
|
| 114 |
+
},
|
| 115 |
+
"baselines": {},
|
| 116 |
+
"config": {
|
| 117 |
+
"timesteps_per_task": 100000,
|
| 118 |
+
"eval_episodes": 50,
|
| 119 |
+
"seed": 42
|
| 120 |
+
},
|
| 121 |
+
"elapsed_min": 48.6515386501948
|
| 122 |
}
|
FINAL_SUBMIT/receipts/R6_GETHSEMANE_ONNX_EXPORT.json
CHANGED
|
@@ -1,25 +1,25 @@
|
|
| 1 |
-
{
|
| 2 |
-
"exports": [
|
| 3 |
-
{
|
| 4 |
-
"task": "easy_typhoon_response",
|
| 5 |
-
"onnx_path": "C:\\Users\\Dell\\Desktop\\Sleep-Token\\v3_arcadia\
|
| 6 |
-
"size_mb": 0.970768,
|
| 7 |
-
"verified": true,
|
| 8 |
-
"max_diff": 1.9073486328125e-06
|
| 9 |
-
},
|
| 10 |
-
{
|
| 11 |
-
"task": "medium_multi_front",
|
| 12 |
-
"onnx_path": "C:\\Users\\Dell\\Desktop\\Sleep-Token\\v3_arcadia\
|
| 13 |
-
"size_mb": 0.970768,
|
| 14 |
-
"verified": true,
|
| 15 |
-
"max_diff": 1.9073486328125e-06
|
| 16 |
-
},
|
| 17 |
-
{
|
| 18 |
-
"task": "hard_cascading_crisis",
|
| 19 |
-
"onnx_path": "C:\\Users\\Dell\\Desktop\\Sleep-Token\\v3_arcadia\
|
| 20 |
-
"size_mb": 0.970768,
|
| 21 |
-
"verified": true,
|
| 22 |
-
"max_diff": 1.430511474609375e-06
|
| 23 |
-
}
|
| 24 |
-
]
|
| 25 |
}
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"exports": [
|
| 3 |
+
{
|
| 4 |
+
"task": "easy_typhoon_response",
|
| 5 |
+
"onnx_path": "C:\\Users\\Dell\\Desktop\\Sleep-Token\\versions/v3_arcadia/\checkpoints\\gethsemane\\ppo_easy_typhoon_response.onnx",
|
| 6 |
+
"size_mb": 0.970768,
|
| 7 |
+
"verified": true,
|
| 8 |
+
"max_diff": 1.9073486328125e-06
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"task": "medium_multi_front",
|
| 12 |
+
"onnx_path": "C:\\Users\\Dell\\Desktop\\Sleep-Token\\versions/v3_arcadia/\checkpoints\\gethsemane\\ppo_medium_multi_front.onnx",
|
| 13 |
+
"size_mb": 0.970768,
|
| 14 |
+
"verified": true,
|
| 15 |
+
"max_diff": 1.9073486328125e-06
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"task": "hard_cascading_crisis",
|
| 19 |
+
"onnx_path": "C:\\Users\\Dell\\Desktop\\Sleep-Token\\versions/v3_arcadia/\checkpoints\\gethsemane\\ppo_hard_cascading_crisis.onnx",
|
| 20 |
+
"size_mb": 0.970768,
|
| 21 |
+
"verified": true,
|
| 22 |
+
"max_diff": 1.430511474609375e-06
|
| 23 |
+
}
|
| 24 |
+
]
|
| 25 |
}
|
FINAL_SUBMIT/receipts/R6_PROVIDER_V2.json
CHANGED
|
@@ -1,330 +1,330 @@
|
|
| 1 |
-
{
|
| 2 |
-
"task": "arrival_time_regression",
|
| 3 |
-
"task_description": "Predict expected disruption arrival time (continuous) per node, given noisy per-edge lead-times and random source nodes. Non-trivial: requires GNN to learn Dijkstra-like aggregation through the graph.",
|
| 4 |
-
"lead_time_noise_sigma_relative": 0.2,
|
| 5 |
-
"graphs": {
|
| 6 |
-
"easy": {
|
| 7 |
-
"n_nodes": 12,
|
| 8 |
-
"n_edges": 12,
|
| 9 |
-
"gnn_mae": 9.20589906692505,
|
| 10 |
-
"mlp_mae": 17.712093811035157,
|
| 11 |
-
"one_hop_mean_mae": 29.553308786787092,
|
| 12 |
-
"improvement_vs_mlp_pct": 48.0247837147887,
|
| 13 |
-
"improvement_vs_1hop_pct": 68.84985321494395,
|
| 14 |
-
"gnn_loss_curve": [
|
| 15 |
-
983.6469454498291,
|
| 16 |
-
694.3125346450805,
|
| 17 |
-
594.0063958816528,
|
| 18 |
-
548.9563833961487,
|
| 19 |
-
495.32008571624755,
|
| 20 |
-
420.9683524398804,
|
| 21 |
-
364.7742200584412,
|
| 22 |
-
329.68193370532987,
|
| 23 |
-
308.9609826283455,
|
| 24 |
-
305.6601629691124,
|
| 25 |
-
298.6861881341934,
|
| 26 |
-
287.8384048962593,
|
| 27 |
-
303.22127193498613,
|
| 28 |
-
291.6199851961136,
|
| 29 |
-
292.3526881427765,
|
| 30 |
-
286.59378911590574,
|
| 31 |
-
297.95547390937804,
|
| 32 |
-
277.4495716457367,
|
| 33 |
-
278.5004913520813,
|
| 34 |
-
273.5950565481186,
|
| 35 |
-
280.847659828186,
|
| 36 |
-
269.8950548853874,
|
| 37 |
-
268.0327960948944,
|
| 38 |
-
272.2881185493469,
|
| 39 |
-
271.73518936920163,
|
| 40 |
-
266.2893534479141,
|
| 41 |
-
268.7633232383728,
|
| 42 |
-
263.14099113464357,
|
| 43 |
-
261.69743074321747,
|
| 44 |
-
262.2134785709381
|
| 45 |
-
],
|
| 46 |
-
"gnn_test_mae_curve": [
|
| 47 |
-
15.625262084007263,
|
| 48 |
-
17.273250563144686,
|
| 49 |
-
15.69198014497757,
|
| 50 |
-
15.216868221759796,
|
| 51 |
-
13.83246925830841,
|
| 52 |
-
12.072544195652007,
|
| 53 |
-
12.047622272968292,
|
| 54 |
-
10.346303402781487,
|
| 55 |
-
10.991831306219101,
|
| 56 |
-
9.730522887706757,
|
| 57 |
-
9.387227172255516,
|
| 58 |
-
12.727755947113037,
|
| 59 |
-
10.449746668934822,
|
| 60 |
-
10.917218554019929,
|
| 61 |
-
9.83320654630661,
|
| 62 |
-
11.56927591919899,
|
| 63 |
-
9.640368175506591,
|
| 64 |
-
9.518106588125228,
|
| 65 |
-
9.238331428766251,
|
| 66 |
-
10.004606694579124,
|
| 67 |
-
9.601016719341278,
|
| 68 |
-
10.924803348779678,
|
| 69 |
-
9.062952963709831,
|
| 70 |
-
11.125388493537903,
|
| 71 |
-
8.51151149213314,
|
| 72 |
-
8.760705815553665,
|
| 73 |
-
8.83567961215973,
|
| 74 |
-
8.716645919680595,
|
| 75 |
-
9.704761312007903,
|
| 76 |
-
9.20589906692505
|
| 77 |
-
],
|
| 78 |
-
"mlp_test_mae_curve": [
|
| 79 |
-
16.517573373317717,
|
| 80 |
-
17.61745592355728,
|
| 81 |
-
17.478831689357758,
|
| 82 |
-
17.963374128341673,
|
| 83 |
-
17.317361807823183,
|
| 84 |
-
17.35558673620224,
|
| 85 |
-
19.272147517204285,
|
| 86 |
-
17.29823645591736,
|
| 87 |
-
18.360565376281738,
|
| 88 |
-
16.33169244527817,
|
| 89 |
-
16.291482293605803,
|
| 90 |
-
20.00996126651764,
|
| 91 |
-
17.24092205762863,
|
| 92 |
-
17.935992388725282,
|
| 93 |
-
18.476314017772676,
|
| 94 |
-
20.500635390281676,
|
| 95 |
-
17.64075089454651,
|
| 96 |
-
19.23261556148529,
|
| 97 |
-
17.159917891025543,
|
| 98 |
-
18.033056726455687,
|
| 99 |
-
17.04588686466217,
|
| 100 |
-
17.51567750453949,
|
| 101 |
-
16.925300316810606,
|
| 102 |
-
19.993932852745058,
|
| 103 |
-
17.863101620674133,
|
| 104 |
-
17.46893537759781,
|
| 105 |
-
17.768136410713197,
|
| 106 |
-
17.399936029911043,
|
| 107 |
-
17.271209075450898,
|
| 108 |
-
17.712093811035157
|
| 109 |
-
]
|
| 110 |
-
},
|
| 111 |
-
"medium": {
|
| 112 |
-
"n_nodes": 25,
|
| 113 |
-
"n_edges": 29,
|
| 114 |
-
"gnn_mae": 14.05237404346466,
|
| 115 |
-
"mlp_mae": 27.562243633270263,
|
| 116 |
-
"one_hop_mean_mae": 23.25141793220304,
|
| 117 |
-
"improvement_vs_mlp_pct": 49.01585578286486,
|
| 118 |
-
"improvement_vs_1hop_pct": 39.56336734198809,
|
| 119 |
-
"gnn_loss_curve": [
|
| 120 |
-
1455.8575012207032,
|
| 121 |
-
1070.794164489746,
|
| 122 |
-
978.3833621215821,
|
| 123 |
-
878.4453280944824,
|
| 124 |
-
759.8914498443603,
|
| 125 |
-
676.4201901473999,
|
| 126 |
-
592.9840587463378,
|
| 127 |
-
593.9022348022461,
|
| 128 |
-
580.474338684082,
|
| 129 |
-
548.8776502380371,
|
| 130 |
-
535.7356602172852,
|
| 131 |
-
524.7076401443481,
|
| 132 |
-
517.5761855316163,
|
| 133 |
-
503.14428115844726,
|
| 134 |
-
504.31373574829104,
|
| 135 |
-
482.12416637420654,
|
| 136 |
-
491.71681065368654,
|
| 137 |
-
476.0351883163452,
|
| 138 |
-
475.84812075042726,
|
| 139 |
-
469.6501838378906,
|
| 140 |
-
473.09340254211423,
|
| 141 |
-
468.5468386917114,
|
| 142 |
-
457.8393885040283,
|
| 143 |
-
461.61461613464354,
|
| 144 |
-
450.00589713287354,
|
| 145 |
-
444.84376406097414,
|
| 146 |
-
448.23634549713137,
|
| 147 |
-
441.89026587677,
|
| 148 |
-
436.69793469238283,
|
| 149 |
-
434.4493161087036
|
| 150 |
-
],
|
| 151 |
-
"gnn_test_mae_curve": [
|
| 152 |
-
26.63341254234314,
|
| 153 |
-
23.634564056396485,
|
| 154 |
-
23.186181049346924,
|
| 155 |
-
21.077601199150084,
|
| 156 |
-
21.637806577682497,
|
| 157 |
-
17.98971748828888,
|
| 158 |
-
16.306520526409148,
|
| 159 |
-
17.966433074474335,
|
| 160 |
-
17.40695864200592,
|
| 161 |
-
15.116412845849991,
|
| 162 |
-
15.247849924564362,
|
| 163 |
-
14.415206160545349,
|
| 164 |
-
15.09439873456955,
|
| 165 |
-
14.077203586101533,
|
| 166 |
-
16.387850997447966,
|
| 167 |
-
16.519536385536195,
|
| 168 |
-
15.912737758159638,
|
| 169 |
-
15.685167801380157,
|
| 170 |
-
15.163068435192109,
|
| 171 |
-
15.200627043247223,
|
| 172 |
-
15.001122550964356,
|
| 173 |
-
14.351007792949677,
|
| 174 |
-
15.44103235244751,
|
| 175 |
-
13.403649566173554,
|
| 176 |
-
17.10527836084366,
|
| 177 |
-
14.323340699672698,
|
| 178 |
-
14.384661407470704,
|
| 179 |
-
14.556273880004882,
|
| 180 |
-
13.85397144317627,
|
| 181 |
-
14.05237404346466
|
| 182 |
-
],
|
| 183 |
-
"mlp_test_mae_curve": [
|
| 184 |
-
27.1725799369812,
|
| 185 |
-
26.40243914604187,
|
| 186 |
-
27.289838228225708,
|
| 187 |
-
26.334666624069214,
|
| 188 |
-
28.48377342224121,
|
| 189 |
-
26.199828100204467,
|
| 190 |
-
29.151524686813353,
|
| 191 |
-
28.400241794586183,
|
| 192 |
-
26.501172218322754,
|
| 193 |
-
27.04287679672241,
|
| 194 |
-
27.969863624572753,
|
| 195 |
-
26.34369418144226,
|
| 196 |
-
28.614215364456175,
|
| 197 |
-
26.348094720840454,
|
| 198 |
-
27.199346466064455,
|
| 199 |
-
26.72101284980774,
|
| 200 |
-
26.492710275650026,
|
| 201 |
-
28.792157373428346,
|
| 202 |
-
25.963287801742553,
|
| 203 |
-
27.035139274597167,
|
| 204 |
-
26.07756766319275,
|
| 205 |
-
27.420557165145873,
|
| 206 |
-
28.615666379928587,
|
| 207 |
-
26.438606796264647,
|
| 208 |
-
26.199908666610717,
|
| 209 |
-
26.585446147918702,
|
| 210 |
-
26.246847848892212,
|
| 211 |
-
26.238035287857056,
|
| 212 |
-
26.170038957595825,
|
| 213 |
-
27.562243633270263
|
| 214 |
-
]
|
| 215 |
-
},
|
| 216 |
-
"hard": {
|
| 217 |
-
"n_nodes": 40,
|
| 218 |
-
"n_edges": 47,
|
| 219 |
-
"gnn_mae": 10.347342171669005,
|
| 220 |
-
"mlp_mae": 28.483039016723634,
|
| 221 |
-
"one_hop_mean_mae": 16.03428017649916,
|
| 222 |
-
"improvement_vs_mlp_pct": 63.67191659010252,
|
| 223 |
-
"improvement_vs_1hop_pct": 35.46737329166347,
|
| 224 |
-
"gnn_loss_curve": [
|
| 225 |
-
1519.987557739258,
|
| 226 |
-
1021.7450046386718,
|
| 227 |
-
815.2417454833984,
|
| 228 |
-
709.5358395690918,
|
| 229 |
-
634.4188123474121,
|
| 230 |
-
560.8865319213867,
|
| 231 |
-
506.78174713134763,
|
| 232 |
-
475.7871089630127,
|
| 233 |
-
451.54362382507327,
|
| 234 |
-
442.535458694458,
|
| 235 |
-
425.76794429016115,
|
| 236 |
-
416.6028264923096,
|
| 237 |
-
416.2537903900147,
|
| 238 |
-
416.3216004333496,
|
| 239 |
-
405.91741243743894,
|
| 240 |
-
401.3154751739502,
|
| 241 |
-
403.56236766052245,
|
| 242 |
-
399.83712251281736,
|
| 243 |
-
397.13397619628904,
|
| 244 |
-
396.69007269287107,
|
| 245 |
-
389.8687892990112,
|
| 246 |
-
386.671229675293,
|
| 247 |
-
390.19565746307376,
|
| 248 |
-
387.47164192962646,
|
| 249 |
-
384.5350112533569,
|
| 250 |
-
385.34569120025634,
|
| 251 |
-
381.3625469284058,
|
| 252 |
-
380.5953342590332,
|
| 253 |
-
376.2190606918335,
|
| 254 |
-
378.44821893310547
|
| 255 |
-
],
|
| 256 |
-
"gnn_test_mae_curve": [
|
| 257 |
-
25.89111141204834,
|
| 258 |
-
22.817488927841186,
|
| 259 |
-
19.102868838310243,
|
| 260 |
-
21.260897178649902,
|
| 261 |
-
16.00875702381134,
|
| 262 |
-
15.999692721366882,
|
| 263 |
-
14.555557656288148,
|
| 264 |
-
13.622318716049195,
|
| 265 |
-
13.0450461602211,
|
| 266 |
-
13.296297969818115,
|
| 267 |
-
12.376682465076447,
|
| 268 |
-
13.256674709320068,
|
| 269 |
-
11.923482534885407,
|
| 270 |
-
11.381103422641754,
|
| 271 |
-
13.629612107276916,
|
| 272 |
-
13.775573563575744,
|
| 273 |
-
12.455035951137543,
|
| 274 |
-
13.674895765781402,
|
| 275 |
-
12.645530993938445,
|
| 276 |
-
12.839997906684875,
|
| 277 |
-
12.782445096969605,
|
| 278 |
-
11.498445341587066,
|
| 279 |
-
12.44089034318924,
|
| 280 |
-
10.853419225215912,
|
| 281 |
-
11.889822478294372,
|
| 282 |
-
11.540131111145019,
|
| 283 |
-
12.30764417886734,
|
| 284 |
-
10.73738386631012,
|
| 285 |
-
10.981562974452972,
|
| 286 |
-
10.347342171669005
|
| 287 |
-
],
|
| 288 |
-
"mlp_test_mae_curve": [
|
| 289 |
-
28.691825714111328,
|
| 290 |
-
29.088216686248778,
|
| 291 |
-
27.926491804122925,
|
| 292 |
-
32.548833179473874,
|
| 293 |
-
28.55751530647278,
|
| 294 |
-
27.89367533683777,
|
| 295 |
-
28.729960765838623,
|
| 296 |
-
29.485910148620604,
|
| 297 |
-
28.418713645935057,
|
| 298 |
-
29.061994075775146,
|
| 299 |
-
27.86555823326111,
|
| 300 |
-
27.882053699493408,
|
| 301 |
-
28.62539842605591,
|
| 302 |
-
28.374376544952394,
|
| 303 |
-
27.627659730911255,
|
| 304 |
-
29.199770755767823,
|
| 305 |
-
26.9179744720459,
|
| 306 |
-
29.280858907699585,
|
| 307 |
-
28.915042276382447,
|
| 308 |
-
28.664446725845337,
|
| 309 |
-
28.888797369003296,
|
| 310 |
-
29.49649586677551,
|
| 311 |
-
29.45292121887207,
|
| 312 |
-
28.840624055862428,
|
| 313 |
-
27.16323224067688,
|
| 314 |
-
27.801621007919312,
|
| 315 |
-
28.310747117996215,
|
| 316 |
-
28.82351138114929,
|
| 317 |
-
30.00698434829712,
|
| 318 |
-
28.483039016723634
|
| 319 |
-
]
|
| 320 |
-
}
|
| 321 |
-
},
|
| 322 |
-
"config": {
|
| 323 |
-
"n_train": 500,
|
| 324 |
-
"n_test": 200,
|
| 325 |
-
"hidden": 64,
|
| 326 |
-
"epochs": 30,
|
| 327 |
-
"lr": 0.003
|
| 328 |
-
},
|
| 329 |
-
"elapsed_min": 4.006023410956065
|
| 330 |
}
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "arrival_time_regression",
|
| 3 |
+
"task_description": "Predict expected disruption arrival time (continuous) per node, given noisy per-edge lead-times and random source nodes. Non-trivial: requires GNN to learn Dijkstra-like aggregation through the graph.",
|
| 4 |
+
"lead_time_noise_sigma_relative": 0.2,
|
| 5 |
+
"graphs": {
|
| 6 |
+
"easy": {
|
| 7 |
+
"n_nodes": 12,
|
| 8 |
+
"n_edges": 12,
|
| 9 |
+
"gnn_mae": 9.20589906692505,
|
| 10 |
+
"mlp_mae": 17.712093811035157,
|
| 11 |
+
"one_hop_mean_mae": 29.553308786787092,
|
| 12 |
+
"improvement_vs_mlp_pct": 48.0247837147887,
|
| 13 |
+
"improvement_vs_1hop_pct": 68.84985321494395,
|
| 14 |
+
"gnn_loss_curve": [
|
| 15 |
+
983.6469454498291,
|
| 16 |
+
694.3125346450805,
|
| 17 |
+
594.0063958816528,
|
| 18 |
+
548.9563833961487,
|
| 19 |
+
495.32008571624755,
|
| 20 |
+
420.9683524398804,
|
| 21 |
+
364.7742200584412,
|
| 22 |
+
329.68193370532987,
|
| 23 |
+
308.9609826283455,
|
| 24 |
+
305.6601629691124,
|
| 25 |
+
298.6861881341934,
|
| 26 |
+
287.8384048962593,
|
| 27 |
+
303.22127193498613,
|
| 28 |
+
291.6199851961136,
|
| 29 |
+
292.3526881427765,
|
| 30 |
+
286.59378911590574,
|
| 31 |
+
297.95547390937804,
|
| 32 |
+
277.4495716457367,
|
| 33 |
+
278.5004913520813,
|
| 34 |
+
273.5950565481186,
|
| 35 |
+
280.847659828186,
|
| 36 |
+
269.8950548853874,
|
| 37 |
+
268.0327960948944,
|
| 38 |
+
272.2881185493469,
|
| 39 |
+
271.73518936920163,
|
| 40 |
+
266.2893534479141,
|
| 41 |
+
268.7633232383728,
|
| 42 |
+
263.14099113464357,
|
| 43 |
+
261.69743074321747,
|
| 44 |
+
262.2134785709381
|
| 45 |
+
],
|
| 46 |
+
"gnn_test_mae_curve": [
|
| 47 |
+
15.625262084007263,
|
| 48 |
+
17.273250563144686,
|
| 49 |
+
15.69198014497757,
|
| 50 |
+
15.216868221759796,
|
| 51 |
+
13.83246925830841,
|
| 52 |
+
12.072544195652007,
|
| 53 |
+
12.047622272968292,
|
| 54 |
+
10.346303402781487,
|
| 55 |
+
10.991831306219101,
|
| 56 |
+
9.730522887706757,
|
| 57 |
+
9.387227172255516,
|
| 58 |
+
12.727755947113037,
|
| 59 |
+
10.449746668934822,
|
| 60 |
+
10.917218554019929,
|
| 61 |
+
9.83320654630661,
|
| 62 |
+
11.56927591919899,
|
| 63 |
+
9.640368175506591,
|
| 64 |
+
9.518106588125228,
|
| 65 |
+
9.238331428766251,
|
| 66 |
+
10.004606694579124,
|
| 67 |
+
9.601016719341278,
|
| 68 |
+
10.924803348779678,
|
| 69 |
+
9.062952963709831,
|
| 70 |
+
11.125388493537903,
|
| 71 |
+
8.51151149213314,
|
| 72 |
+
8.760705815553665,
|
| 73 |
+
8.83567961215973,
|
| 74 |
+
8.716645919680595,
|
| 75 |
+
9.704761312007903,
|
| 76 |
+
9.20589906692505
|
| 77 |
+
],
|
| 78 |
+
"mlp_test_mae_curve": [
|
| 79 |
+
16.517573373317717,
|
| 80 |
+
17.61745592355728,
|
| 81 |
+
17.478831689357758,
|
| 82 |
+
17.963374128341673,
|
| 83 |
+
17.317361807823183,
|
| 84 |
+
17.35558673620224,
|
| 85 |
+
19.272147517204285,
|
| 86 |
+
17.29823645591736,
|
| 87 |
+
18.360565376281738,
|
| 88 |
+
16.33169244527817,
|
| 89 |
+
16.291482293605803,
|
| 90 |
+
20.00996126651764,
|
| 91 |
+
17.24092205762863,
|
| 92 |
+
17.935992388725282,
|
| 93 |
+
18.476314017772676,
|
| 94 |
+
20.500635390281676,
|
| 95 |
+
17.64075089454651,
|
| 96 |
+
19.23261556148529,
|
| 97 |
+
17.159917891025543,
|
| 98 |
+
18.033056726455687,
|
| 99 |
+
17.04588686466217,
|
| 100 |
+
17.51567750453949,
|
| 101 |
+
16.925300316810606,
|
| 102 |
+
19.993932852745058,
|
| 103 |
+
17.863101620674133,
|
| 104 |
+
17.46893537759781,
|
| 105 |
+
17.768136410713197,
|
| 106 |
+
17.399936029911043,
|
| 107 |
+
17.271209075450898,
|
| 108 |
+
17.712093811035157
|
| 109 |
+
]
|
| 110 |
+
},
|
| 111 |
+
"medium": {
|
| 112 |
+
"n_nodes": 25,
|
| 113 |
+
"n_edges": 29,
|
| 114 |
+
"gnn_mae": 14.05237404346466,
|
| 115 |
+
"mlp_mae": 27.562243633270263,
|
| 116 |
+
"one_hop_mean_mae": 23.25141793220304,
|
| 117 |
+
"improvement_vs_mlp_pct": 49.01585578286486,
|
| 118 |
+
"improvement_vs_1hop_pct": 39.56336734198809,
|
| 119 |
+
"gnn_loss_curve": [
|
| 120 |
+
1455.8575012207032,
|
| 121 |
+
1070.794164489746,
|
| 122 |
+
978.3833621215821,
|
| 123 |
+
878.4453280944824,
|
| 124 |
+
759.8914498443603,
|
| 125 |
+
676.4201901473999,
|
| 126 |
+
592.9840587463378,
|
| 127 |
+
593.9022348022461,
|
| 128 |
+
580.474338684082,
|
| 129 |
+
548.8776502380371,
|
| 130 |
+
535.7356602172852,
|
| 131 |
+
524.7076401443481,
|
| 132 |
+
517.5761855316163,
|
| 133 |
+
503.14428115844726,
|
| 134 |
+
504.31373574829104,
|
| 135 |
+
482.12416637420654,
|
| 136 |
+
491.71681065368654,
|
| 137 |
+
476.0351883163452,
|
| 138 |
+
475.84812075042726,
|
| 139 |
+
469.6501838378906,
|
| 140 |
+
473.09340254211423,
|
| 141 |
+
468.5468386917114,
|
| 142 |
+
457.8393885040283,
|
| 143 |
+
461.61461613464354,
|
| 144 |
+
450.00589713287354,
|
| 145 |
+
444.84376406097414,
|
| 146 |
+
448.23634549713137,
|
| 147 |
+
441.89026587677,
|
| 148 |
+
436.69793469238283,
|
| 149 |
+
434.4493161087036
|
| 150 |
+
],
|
| 151 |
+
"gnn_test_mae_curve": [
|
| 152 |
+
26.63341254234314,
|
| 153 |
+
23.634564056396485,
|
| 154 |
+
23.186181049346924,
|
| 155 |
+
21.077601199150084,
|
| 156 |
+
21.637806577682497,
|
| 157 |
+
17.98971748828888,
|
| 158 |
+
16.306520526409148,
|
| 159 |
+
17.966433074474335,
|
| 160 |
+
17.40695864200592,
|
| 161 |
+
15.116412845849991,
|
| 162 |
+
15.247849924564362,
|
| 163 |
+
14.415206160545349,
|
| 164 |
+
15.09439873456955,
|
| 165 |
+
14.077203586101533,
|
| 166 |
+
16.387850997447966,
|
| 167 |
+
16.519536385536195,
|
| 168 |
+
15.912737758159638,
|
| 169 |
+
15.685167801380157,
|
| 170 |
+
15.163068435192109,
|
| 171 |
+
15.200627043247223,
|
| 172 |
+
15.001122550964356,
|
| 173 |
+
14.351007792949677,
|
| 174 |
+
15.44103235244751,
|
| 175 |
+
13.403649566173554,
|
| 176 |
+
17.10527836084366,
|
| 177 |
+
14.323340699672698,
|
| 178 |
+
14.384661407470704,
|
| 179 |
+
14.556273880004882,
|
| 180 |
+
13.85397144317627,
|
| 181 |
+
14.05237404346466
|
| 182 |
+
],
|
| 183 |
+
"mlp_test_mae_curve": [
|
| 184 |
+
27.1725799369812,
|
| 185 |
+
26.40243914604187,
|
| 186 |
+
27.289838228225708,
|
| 187 |
+
26.334666624069214,
|
| 188 |
+
28.48377342224121,
|
| 189 |
+
26.199828100204467,
|
| 190 |
+
29.151524686813353,
|
| 191 |
+
28.400241794586183,
|
| 192 |
+
26.501172218322754,
|
| 193 |
+
27.04287679672241,
|
| 194 |
+
27.969863624572753,
|
| 195 |
+
26.34369418144226,
|
| 196 |
+
28.614215364456175,
|
| 197 |
+
26.348094720840454,
|
| 198 |
+
27.199346466064455,
|
| 199 |
+
26.72101284980774,
|
| 200 |
+
26.492710275650026,
|
| 201 |
+
28.792157373428346,
|
| 202 |
+
25.963287801742553,
|
| 203 |
+
27.035139274597167,
|
| 204 |
+
26.07756766319275,
|
| 205 |
+
27.420557165145873,
|
| 206 |
+
28.615666379928587,
|
| 207 |
+
26.438606796264647,
|
| 208 |
+
26.199908666610717,
|
| 209 |
+
26.585446147918702,
|
| 210 |
+
26.246847848892212,
|
| 211 |
+
26.238035287857056,
|
| 212 |
+
26.170038957595825,
|
| 213 |
+
27.562243633270263
|
| 214 |
+
]
|
| 215 |
+
},
|
| 216 |
+
"hard": {
|
| 217 |
+
"n_nodes": 40,
|
| 218 |
+
"n_edges": 47,
|
| 219 |
+
"gnn_mae": 10.347342171669005,
|
| 220 |
+
"mlp_mae": 28.483039016723634,
|
| 221 |
+
"one_hop_mean_mae": 16.03428017649916,
|
| 222 |
+
"improvement_vs_mlp_pct": 63.67191659010252,
|
| 223 |
+
"improvement_vs_1hop_pct": 35.46737329166347,
|
| 224 |
+
"gnn_loss_curve": [
|
| 225 |
+
1519.987557739258,
|
| 226 |
+
1021.7450046386718,
|
| 227 |
+
815.2417454833984,
|
| 228 |
+
709.5358395690918,
|
| 229 |
+
634.4188123474121,
|
| 230 |
+
560.8865319213867,
|
| 231 |
+
506.78174713134763,
|
| 232 |
+
475.7871089630127,
|
| 233 |
+
451.54362382507327,
|
| 234 |
+
442.535458694458,
|
| 235 |
+
425.76794429016115,
|
| 236 |
+
416.6028264923096,
|
| 237 |
+
416.2537903900147,
|
| 238 |
+
416.3216004333496,
|
| 239 |
+
405.91741243743894,
|
| 240 |
+
401.3154751739502,
|
| 241 |
+
403.56236766052245,
|
| 242 |
+
399.83712251281736,
|
| 243 |
+
397.13397619628904,
|
| 244 |
+
396.69007269287107,
|
| 245 |
+
389.8687892990112,
|
| 246 |
+
386.671229675293,
|
| 247 |
+
390.19565746307376,
|
| 248 |
+
387.47164192962646,
|
| 249 |
+
384.5350112533569,
|
| 250 |
+
385.34569120025634,
|
| 251 |
+
381.3625469284058,
|
| 252 |
+
380.5953342590332,
|
| 253 |
+
376.2190606918335,
|
| 254 |
+
378.44821893310547
|
| 255 |
+
],
|
| 256 |
+
"gnn_test_mae_curve": [
|
| 257 |
+
25.89111141204834,
|
| 258 |
+
22.817488927841186,
|
| 259 |
+
19.102868838310243,
|
| 260 |
+
21.260897178649902,
|
| 261 |
+
16.00875702381134,
|
| 262 |
+
15.999692721366882,
|
| 263 |
+
14.555557656288148,
|
| 264 |
+
13.622318716049195,
|
| 265 |
+
13.0450461602211,
|
| 266 |
+
13.296297969818115,
|
| 267 |
+
12.376682465076447,
|
| 268 |
+
13.256674709320068,
|
| 269 |
+
11.923482534885407,
|
| 270 |
+
11.381103422641754,
|
| 271 |
+
13.629612107276916,
|
| 272 |
+
13.775573563575744,
|
| 273 |
+
12.455035951137543,
|
| 274 |
+
13.674895765781402,
|
| 275 |
+
12.645530993938445,
|
| 276 |
+
12.839997906684875,
|
| 277 |
+
12.782445096969605,
|
| 278 |
+
11.498445341587066,
|
| 279 |
+
12.44089034318924,
|
| 280 |
+
10.853419225215912,
|
| 281 |
+
11.889822478294372,
|
| 282 |
+
11.540131111145019,
|
| 283 |
+
12.30764417886734,
|
| 284 |
+
10.73738386631012,
|
| 285 |
+
10.981562974452972,
|
| 286 |
+
10.347342171669005
|
| 287 |
+
],
|
| 288 |
+
"mlp_test_mae_curve": [
|
| 289 |
+
28.691825714111328,
|
| 290 |
+
29.088216686248778,
|
| 291 |
+
27.926491804122925,
|
| 292 |
+
32.548833179473874,
|
| 293 |
+
28.55751530647278,
|
| 294 |
+
27.89367533683777,
|
| 295 |
+
28.729960765838623,
|
| 296 |
+
29.485910148620604,
|
| 297 |
+
28.418713645935057,
|
| 298 |
+
29.061994075775146,
|
| 299 |
+
27.86555823326111,
|
| 300 |
+
27.882053699493408,
|
| 301 |
+
28.62539842605591,
|
| 302 |
+
28.374376544952394,
|
| 303 |
+
27.627659730911255,
|
| 304 |
+
29.199770755767823,
|
| 305 |
+
26.9179744720459,
|
| 306 |
+
29.280858907699585,
|
| 307 |
+
28.915042276382447,
|
| 308 |
+
28.664446725845337,
|
| 309 |
+
28.888797369003296,
|
| 310 |
+
29.49649586677551,
|
| 311 |
+
29.45292121887207,
|
| 312 |
+
28.840624055862428,
|
| 313 |
+
27.16323224067688,
|
| 314 |
+
27.801621007919312,
|
| 315 |
+
28.310747117996215,
|
| 316 |
+
28.82351138114929,
|
| 317 |
+
30.00698434829712,
|
| 318 |
+
28.483039016723634
|
| 319 |
+
]
|
| 320 |
+
}
|
| 321 |
+
},
|
| 322 |
+
"config": {
|
| 323 |
+
"n_train": 500,
|
| 324 |
+
"n_test": 200,
|
| 325 |
+
"hidden": 64,
|
| 326 |
+
"epochs": 30,
|
| 327 |
+
"lr": 0.003
|
| 328 |
+
},
|
| 329 |
+
"elapsed_min": 4.006023410956065
|
| 330 |
}
|
FINAL_SUBMIT/receipts/R6_PROVIDER_v1_F1.json
CHANGED
|
@@ -1,1756 +1,1756 @@
|
|
| 1 |
-
{
|
| 2 |
-
"graphs": {
|
| 3 |
-
"easy": {
|
| 4 |
-
"n_nodes": 12,
|
| 5 |
-
"n_edges": 10,
|
| 6 |
-
"gnn_final": {
|
| 7 |
-
"acc": 1.0,
|
| 8 |
-
"precision": 1.0,
|
| 9 |
-
"recall": 1.0,
|
| 10 |
-
"f1": 1.0
|
| 11 |
-
},
|
| 12 |
-
"baseline_direct_neighbors": {
|
| 13 |
-
"acc": 0.8258333333333333,
|
| 14 |
-
"precision": 1.0,
|
| 15 |
-
"recall": 0.6352530541012217,
|
| 16 |
-
"f1": 0.7769477054429028
|
| 17 |
-
},
|
| 18 |
-
"improvement_f1_pp": 22.305229455709718,
|
| 19 |
-
"train_loss_curve": [
|
| 20 |
-
0.10601958807871187,
|
| 21 |
-
0.00014574478766241308,
|
| 22 |
-
2.1336230871288145e-05,
|
| 23 |
-
5.904760447787133e-06,
|
| 24 |
-
0.014828034023753519,
|
| 25 |
-
0.0001365676538936252,
|
| 26 |
-
2.800940909035432e-05,
|
| 27 |
-
7.873948834791846e-06,
|
| 28 |
-
2.40824965675521e-06,
|
| 29 |
-
7.439197035413468e-07,
|
| 30 |
-
2.349434055591839e-07,
|
| 31 |
-
8.035365056026132e-08,
|
| 32 |
-
1.866763376779131e-08,
|
| 33 |
-
6.7128299592450774e-09,
|
| 34 |
-
3.606812599319898e-09,
|
| 35 |
-
2.4320182903440704e-09,
|
| 36 |
-
1.5445408799196548e-09,
|
| 37 |
-
0.03198392186360504,
|
| 38 |
-
1.3277981027858794e-05,
|
| 39 |
-
7.040849976128097e-06,
|
| 40 |
-
2.0380432214083175e-06,
|
| 41 |
-
5.154616233541851e-07,
|
| 42 |
-
0.017213296287886225,
|
| 43 |
-
0.00023569030925164338,
|
| 44 |
-
2.4805963813645227e-05,
|
| 45 |
-
6.058055528068272e-06,
|
| 46 |
-
1.8203820033098038e-06,
|
| 47 |
-
6.043328515907098e-07,
|
| 48 |
-
2.1225388103874568e-07,
|
| 49 |
-
7.437462508802039e-08,
|
| 50 |
-
1.902343076246039e-08,
|
| 51 |
-
6.527784956639485e-09,
|
| 52 |
-
3.3294667175720776e-09,
|
| 53 |
-
1.9615958442567566e-09,
|
| 54 |
-
0.010902570914775889,
|
| 55 |
-
2.806348171776314e-05,
|
| 56 |
-
7.667120790626038e-06,
|
| 57 |
-
2.582107717285551e-06,
|
| 58 |
-
9.129105348027232e-07,
|
| 59 |
-
3.106581481139294e-07,
|
| 60 |
-
1.0230859844032431e-07,
|
| 61 |
-
2.725160428237702e-08,
|
| 62 |
-
8.880124408068363e-09,
|
| 63 |
-
4.4200613740675046e-09,
|
| 64 |
-
2.8600379247657045e-09,
|
| 65 |
-
2.2151315261330923e-09,
|
| 66 |
-
1.7114610773887693e-09,
|
| 67 |
-
1.4000422095074408e-09,
|
| 68 |
-
1.0463116296276038e-09,
|
| 69 |
-
6.4079628731738e-10,
|
| 70 |
-
0.02516633728286725,
|
| 71 |
-
0.00012813284900565014,
|
| 72 |
-
2.3232634050379803e-05,
|
| 73 |
-
7.066120872802589e-06,
|
| 74 |
-
2.311430617913936e-06,
|
| 75 |
-
7.920952698295068e-07,
|
| 76 |
-
2.5278086959691613e-07,
|
| 77 |
-
7.818242851037627e-08,
|
| 78 |
-
1.983640248580842e-08,
|
| 79 |
-
7.863145182916767e-09,
|
| 80 |
-
5.0701508055233275e-09,
|
| 81 |
-
4.364776342121379e-09,
|
| 82 |
-
3.937454630286758e-09,
|
| 83 |
-
2.518706138457294e-09,
|
| 84 |
-
1.9815549914984234e-09,
|
| 85 |
-
0.018349960519401222,
|
| 86 |
-
7.85511791638533e-05,
|
| 87 |
-
2.0063992723006376e-05,
|
| 88 |
-
6.210748974664104e-06,
|
| 89 |
-
1.9043317207399904e-06,
|
| 90 |
-
6.112533347568437e-07,
|
| 91 |
-
2.0612900407184615e-07,
|
| 92 |
-
6.247272126631417e-08,
|
| 93 |
-
1.5818333928198573e-08,
|
| 94 |
-
5.678499110562204e-09,
|
| 95 |
-
2.927658185385007e-09,
|
| 96 |
-
2.2895658619235268e-09,
|
| 97 |
-
1.9812523096841366e-09,
|
| 98 |
-
1.418338779821114e-09,
|
| 99 |
-
9.94527561841937e-10
|
| 100 |
-
],
|
| 101 |
-
"test_metric_curve": [
|
| 102 |
-
{
|
| 103 |
-
"acc": 1.0,
|
| 104 |
-
"precision": 1.0,
|
| 105 |
-
"recall": 1.0,
|
| 106 |
-
"f1": 1.0
|
| 107 |
-
},
|
| 108 |
-
{
|
| 109 |
-
"acc": 1.0,
|
| 110 |
-
"precision": 1.0,
|
| 111 |
-
"recall": 1.0,
|
| 112 |
-
"f1": 1.0
|
| 113 |
-
},
|
| 114 |
-
{
|
| 115 |
-
"acc": 1.0,
|
| 116 |
-
"precision": 1.0,
|
| 117 |
-
"recall": 1.0,
|
| 118 |
-
"f1": 1.0
|
| 119 |
-
},
|
| 120 |
-
{
|
| 121 |
-
"acc": 1.0,
|
| 122 |
-
"precision": 1.0,
|
| 123 |
-
"recall": 1.0,
|
| 124 |
-
"f1": 1.0
|
| 125 |
-
},
|
| 126 |
-
{
|
| 127 |
-
"acc": 1.0,
|
| 128 |
-
"precision": 1.0,
|
| 129 |
-
"recall": 1.0,
|
| 130 |
-
"f1": 1.0
|
| 131 |
-
},
|
| 132 |
-
{
|
| 133 |
-
"acc": 1.0,
|
| 134 |
-
"precision": 1.0,
|
| 135 |
-
"recall": 1.0,
|
| 136 |
-
"f1": 1.0
|
| 137 |
-
},
|
| 138 |
-
{
|
| 139 |
-
"acc": 1.0,
|
| 140 |
-
"precision": 1.0,
|
| 141 |
-
"recall": 1.0,
|
| 142 |
-
"f1": 1.0
|
| 143 |
-
},
|
| 144 |
-
{
|
| 145 |
-
"acc": 1.0,
|
| 146 |
-
"precision": 1.0,
|
| 147 |
-
"recall": 1.0,
|
| 148 |
-
"f1": 1.0
|
| 149 |
-
},
|
| 150 |
-
{
|
| 151 |
-
"acc": 1.0,
|
| 152 |
-
"precision": 1.0,
|
| 153 |
-
"recall": 1.0,
|
| 154 |
-
"f1": 1.0
|
| 155 |
-
},
|
| 156 |
-
{
|
| 157 |
-
"acc": 1.0,
|
| 158 |
-
"precision": 1.0,
|
| 159 |
-
"recall": 1.0,
|
| 160 |
-
"f1": 1.0
|
| 161 |
-
},
|
| 162 |
-
{
|
| 163 |
-
"acc": 1.0,
|
| 164 |
-
"precision": 1.0,
|
| 165 |
-
"recall": 1.0,
|
| 166 |
-
"f1": 1.0
|
| 167 |
-
},
|
| 168 |
-
{
|
| 169 |
-
"acc": 1.0,
|
| 170 |
-
"precision": 1.0,
|
| 171 |
-
"recall": 1.0,
|
| 172 |
-
"f1": 1.0
|
| 173 |
-
},
|
| 174 |
-
{
|
| 175 |
-
"acc": 1.0,
|
| 176 |
-
"precision": 1.0,
|
| 177 |
-
"recall": 1.0,
|
| 178 |
-
"f1": 1.0
|
| 179 |
-
},
|
| 180 |
-
{
|
| 181 |
-
"acc": 1.0,
|
| 182 |
-
"precision": 1.0,
|
| 183 |
-
"recall": 1.0,
|
| 184 |
-
"f1": 1.0
|
| 185 |
-
},
|
| 186 |
-
{
|
| 187 |
-
"acc": 1.0,
|
| 188 |
-
"precision": 1.0,
|
| 189 |
-
"recall": 1.0,
|
| 190 |
-
"f1": 1.0
|
| 191 |
-
},
|
| 192 |
-
{
|
| 193 |
-
"acc": 1.0,
|
| 194 |
-
"precision": 1.0,
|
| 195 |
-
"recall": 1.0,
|
| 196 |
-
"f1": 1.0
|
| 197 |
-
},
|
| 198 |
-
{
|
| 199 |
-
"acc": 1.0,
|
| 200 |
-
"precision": 1.0,
|
| 201 |
-
"recall": 1.0,
|
| 202 |
-
"f1": 1.0
|
| 203 |
-
},
|
| 204 |
-
{
|
| 205 |
-
"acc": 1.0,
|
| 206 |
-
"precision": 1.0,
|
| 207 |
-
"recall": 1.0,
|
| 208 |
-
"f1": 1.0
|
| 209 |
-
},
|
| 210 |
-
{
|
| 211 |
-
"acc": 1.0,
|
| 212 |
-
"precision": 1.0,
|
| 213 |
-
"recall": 1.0,
|
| 214 |
-
"f1": 1.0
|
| 215 |
-
},
|
| 216 |
-
{
|
| 217 |
-
"acc": 1.0,
|
| 218 |
-
"precision": 1.0,
|
| 219 |
-
"recall": 1.0,
|
| 220 |
-
"f1": 1.0
|
| 221 |
-
},
|
| 222 |
-
{
|
| 223 |
-
"acc": 1.0,
|
| 224 |
-
"precision": 1.0,
|
| 225 |
-
"recall": 1.0,
|
| 226 |
-
"f1": 1.0
|
| 227 |
-
},
|
| 228 |
-
{
|
| 229 |
-
"acc": 1.0,
|
| 230 |
-
"precision": 1.0,
|
| 231 |
-
"recall": 1.0,
|
| 232 |
-
"f1": 1.0
|
| 233 |
-
},
|
| 234 |
-
{
|
| 235 |
-
"acc": 1.0,
|
| 236 |
-
"precision": 1.0,
|
| 237 |
-
"recall": 1.0,
|
| 238 |
-
"f1": 1.0
|
| 239 |
-
},
|
| 240 |
-
{
|
| 241 |
-
"acc": 1.0,
|
| 242 |
-
"precision": 1.0,
|
| 243 |
-
"recall": 1.0,
|
| 244 |
-
"f1": 1.0
|
| 245 |
-
},
|
| 246 |
-
{
|
| 247 |
-
"acc": 1.0,
|
| 248 |
-
"precision": 1.0,
|
| 249 |
-
"recall": 1.0,
|
| 250 |
-
"f1": 1.0
|
| 251 |
-
},
|
| 252 |
-
{
|
| 253 |
-
"acc": 1.0,
|
| 254 |
-
"precision": 1.0,
|
| 255 |
-
"recall": 1.0,
|
| 256 |
-
"f1": 1.0
|
| 257 |
-
},
|
| 258 |
-
{
|
| 259 |
-
"acc": 1.0,
|
| 260 |
-
"precision": 1.0,
|
| 261 |
-
"recall": 1.0,
|
| 262 |
-
"f1": 1.0
|
| 263 |
-
},
|
| 264 |
-
{
|
| 265 |
-
"acc": 1.0,
|
| 266 |
-
"precision": 1.0,
|
| 267 |
-
"recall": 1.0,
|
| 268 |
-
"f1": 1.0
|
| 269 |
-
},
|
| 270 |
-
{
|
| 271 |
-
"acc": 1.0,
|
| 272 |
-
"precision": 1.0,
|
| 273 |
-
"recall": 1.0,
|
| 274 |
-
"f1": 1.0
|
| 275 |
-
},
|
| 276 |
-
{
|
| 277 |
-
"acc": 1.0,
|
| 278 |
-
"precision": 1.0,
|
| 279 |
-
"recall": 1.0,
|
| 280 |
-
"f1": 1.0
|
| 281 |
-
},
|
| 282 |
-
{
|
| 283 |
-
"acc": 1.0,
|
| 284 |
-
"precision": 1.0,
|
| 285 |
-
"recall": 1.0,
|
| 286 |
-
"f1": 1.0
|
| 287 |
-
},
|
| 288 |
-
{
|
| 289 |
-
"acc": 1.0,
|
| 290 |
-
"precision": 1.0,
|
| 291 |
-
"recall": 1.0,
|
| 292 |
-
"f1": 1.0
|
| 293 |
-
},
|
| 294 |
-
{
|
| 295 |
-
"acc": 1.0,
|
| 296 |
-
"precision": 1.0,
|
| 297 |
-
"recall": 1.0,
|
| 298 |
-
"f1": 1.0
|
| 299 |
-
},
|
| 300 |
-
{
|
| 301 |
-
"acc": 1.0,
|
| 302 |
-
"precision": 1.0,
|
| 303 |
-
"recall": 1.0,
|
| 304 |
-
"f1": 1.0
|
| 305 |
-
},
|
| 306 |
-
{
|
| 307 |
-
"acc": 1.0,
|
| 308 |
-
"precision": 1.0,
|
| 309 |
-
"recall": 1.0,
|
| 310 |
-
"f1": 1.0
|
| 311 |
-
},
|
| 312 |
-
{
|
| 313 |
-
"acc": 1.0,
|
| 314 |
-
"precision": 1.0,
|
| 315 |
-
"recall": 1.0,
|
| 316 |
-
"f1": 1.0
|
| 317 |
-
},
|
| 318 |
-
{
|
| 319 |
-
"acc": 1.0,
|
| 320 |
-
"precision": 1.0,
|
| 321 |
-
"recall": 1.0,
|
| 322 |
-
"f1": 1.0
|
| 323 |
-
},
|
| 324 |
-
{
|
| 325 |
-
"acc": 1.0,
|
| 326 |
-
"precision": 1.0,
|
| 327 |
-
"recall": 1.0,
|
| 328 |
-
"f1": 1.0
|
| 329 |
-
},
|
| 330 |
-
{
|
| 331 |
-
"acc": 1.0,
|
| 332 |
-
"precision": 1.0,
|
| 333 |
-
"recall": 1.0,
|
| 334 |
-
"f1": 1.0
|
| 335 |
-
},
|
| 336 |
-
{
|
| 337 |
-
"acc": 1.0,
|
| 338 |
-
"precision": 1.0,
|
| 339 |
-
"recall": 1.0,
|
| 340 |
-
"f1": 1.0
|
| 341 |
-
},
|
| 342 |
-
{
|
| 343 |
-
"acc": 1.0,
|
| 344 |
-
"precision": 1.0,
|
| 345 |
-
"recall": 1.0,
|
| 346 |
-
"f1": 1.0
|
| 347 |
-
},
|
| 348 |
-
{
|
| 349 |
-
"acc": 1.0,
|
| 350 |
-
"precision": 1.0,
|
| 351 |
-
"recall": 1.0,
|
| 352 |
-
"f1": 1.0
|
| 353 |
-
},
|
| 354 |
-
{
|
| 355 |
-
"acc": 1.0,
|
| 356 |
-
"precision": 1.0,
|
| 357 |
-
"recall": 1.0,
|
| 358 |
-
"f1": 1.0
|
| 359 |
-
},
|
| 360 |
-
{
|
| 361 |
-
"acc": 1.0,
|
| 362 |
-
"precision": 1.0,
|
| 363 |
-
"recall": 1.0,
|
| 364 |
-
"f1": 1.0
|
| 365 |
-
},
|
| 366 |
-
{
|
| 367 |
-
"acc": 1.0,
|
| 368 |
-
"precision": 1.0,
|
| 369 |
-
"recall": 1.0,
|
| 370 |
-
"f1": 1.0
|
| 371 |
-
},
|
| 372 |
-
{
|
| 373 |
-
"acc": 1.0,
|
| 374 |
-
"precision": 1.0,
|
| 375 |
-
"recall": 1.0,
|
| 376 |
-
"f1": 1.0
|
| 377 |
-
},
|
| 378 |
-
{
|
| 379 |
-
"acc": 1.0,
|
| 380 |
-
"precision": 1.0,
|
| 381 |
-
"recall": 1.0,
|
| 382 |
-
"f1": 1.0
|
| 383 |
-
},
|
| 384 |
-
{
|
| 385 |
-
"acc": 1.0,
|
| 386 |
-
"precision": 1.0,
|
| 387 |
-
"recall": 1.0,
|
| 388 |
-
"f1": 1.0
|
| 389 |
-
},
|
| 390 |
-
{
|
| 391 |
-
"acc": 1.0,
|
| 392 |
-
"precision": 1.0,
|
| 393 |
-
"recall": 1.0,
|
| 394 |
-
"f1": 1.0
|
| 395 |
-
},
|
| 396 |
-
{
|
| 397 |
-
"acc": 1.0,
|
| 398 |
-
"precision": 1.0,
|
| 399 |
-
"recall": 1.0,
|
| 400 |
-
"f1": 1.0
|
| 401 |
-
},
|
| 402 |
-
{
|
| 403 |
-
"acc": 1.0,
|
| 404 |
-
"precision": 1.0,
|
| 405 |
-
"recall": 1.0,
|
| 406 |
-
"f1": 1.0
|
| 407 |
-
},
|
| 408 |
-
{
|
| 409 |
-
"acc": 1.0,
|
| 410 |
-
"precision": 1.0,
|
| 411 |
-
"recall": 1.0,
|
| 412 |
-
"f1": 1.0
|
| 413 |
-
},
|
| 414 |
-
{
|
| 415 |
-
"acc": 1.0,
|
| 416 |
-
"precision": 1.0,
|
| 417 |
-
"recall": 1.0,
|
| 418 |
-
"f1": 1.0
|
| 419 |
-
},
|
| 420 |
-
{
|
| 421 |
-
"acc": 1.0,
|
| 422 |
-
"precision": 1.0,
|
| 423 |
-
"recall": 1.0,
|
| 424 |
-
"f1": 1.0
|
| 425 |
-
},
|
| 426 |
-
{
|
| 427 |
-
"acc": 1.0,
|
| 428 |
-
"precision": 1.0,
|
| 429 |
-
"recall": 1.0,
|
| 430 |
-
"f1": 1.0
|
| 431 |
-
},
|
| 432 |
-
{
|
| 433 |
-
"acc": 1.0,
|
| 434 |
-
"precision": 1.0,
|
| 435 |
-
"recall": 1.0,
|
| 436 |
-
"f1": 1.0
|
| 437 |
-
},
|
| 438 |
-
{
|
| 439 |
-
"acc": 1.0,
|
| 440 |
-
"precision": 1.0,
|
| 441 |
-
"recall": 1.0,
|
| 442 |
-
"f1": 1.0
|
| 443 |
-
},
|
| 444 |
-
{
|
| 445 |
-
"acc": 1.0,
|
| 446 |
-
"precision": 1.0,
|
| 447 |
-
"recall": 1.0,
|
| 448 |
-
"f1": 1.0
|
| 449 |
-
},
|
| 450 |
-
{
|
| 451 |
-
"acc": 1.0,
|
| 452 |
-
"precision": 1.0,
|
| 453 |
-
"recall": 1.0,
|
| 454 |
-
"f1": 1.0
|
| 455 |
-
},
|
| 456 |
-
{
|
| 457 |
-
"acc": 1.0,
|
| 458 |
-
"precision": 1.0,
|
| 459 |
-
"recall": 1.0,
|
| 460 |
-
"f1": 1.0
|
| 461 |
-
},
|
| 462 |
-
{
|
| 463 |
-
"acc": 1.0,
|
| 464 |
-
"precision": 1.0,
|
| 465 |
-
"recall": 1.0,
|
| 466 |
-
"f1": 1.0
|
| 467 |
-
},
|
| 468 |
-
{
|
| 469 |
-
"acc": 1.0,
|
| 470 |
-
"precision": 1.0,
|
| 471 |
-
"recall": 1.0,
|
| 472 |
-
"f1": 1.0
|
| 473 |
-
},
|
| 474 |
-
{
|
| 475 |
-
"acc": 1.0,
|
| 476 |
-
"precision": 1.0,
|
| 477 |
-
"recall": 1.0,
|
| 478 |
-
"f1": 1.0
|
| 479 |
-
},
|
| 480 |
-
{
|
| 481 |
-
"acc": 1.0,
|
| 482 |
-
"precision": 1.0,
|
| 483 |
-
"recall": 1.0,
|
| 484 |
-
"f1": 1.0
|
| 485 |
-
},
|
| 486 |
-
{
|
| 487 |
-
"acc": 1.0,
|
| 488 |
-
"precision": 1.0,
|
| 489 |
-
"recall": 1.0,
|
| 490 |
-
"f1": 1.0
|
| 491 |
-
},
|
| 492 |
-
{
|
| 493 |
-
"acc": 1.0,
|
| 494 |
-
"precision": 1.0,
|
| 495 |
-
"recall": 1.0,
|
| 496 |
-
"f1": 1.0
|
| 497 |
-
},
|
| 498 |
-
{
|
| 499 |
-
"acc": 1.0,
|
| 500 |
-
"precision": 1.0,
|
| 501 |
-
"recall": 1.0,
|
| 502 |
-
"f1": 1.0
|
| 503 |
-
},
|
| 504 |
-
{
|
| 505 |
-
"acc": 1.0,
|
| 506 |
-
"precision": 1.0,
|
| 507 |
-
"recall": 1.0,
|
| 508 |
-
"f1": 1.0
|
| 509 |
-
},
|
| 510 |
-
{
|
| 511 |
-
"acc": 1.0,
|
| 512 |
-
"precision": 1.0,
|
| 513 |
-
"recall": 1.0,
|
| 514 |
-
"f1": 1.0
|
| 515 |
-
},
|
| 516 |
-
{
|
| 517 |
-
"acc": 1.0,
|
| 518 |
-
"precision": 1.0,
|
| 519 |
-
"recall": 1.0,
|
| 520 |
-
"f1": 1.0
|
| 521 |
-
},
|
| 522 |
-
{
|
| 523 |
-
"acc": 1.0,
|
| 524 |
-
"precision": 1.0,
|
| 525 |
-
"recall": 1.0,
|
| 526 |
-
"f1": 1.0
|
| 527 |
-
},
|
| 528 |
-
{
|
| 529 |
-
"acc": 1.0,
|
| 530 |
-
"precision": 1.0,
|
| 531 |
-
"recall": 1.0,
|
| 532 |
-
"f1": 1.0
|
| 533 |
-
},
|
| 534 |
-
{
|
| 535 |
-
"acc": 1.0,
|
| 536 |
-
"precision": 1.0,
|
| 537 |
-
"recall": 1.0,
|
| 538 |
-
"f1": 1.0
|
| 539 |
-
},
|
| 540 |
-
{
|
| 541 |
-
"acc": 1.0,
|
| 542 |
-
"precision": 1.0,
|
| 543 |
-
"recall": 1.0,
|
| 544 |
-
"f1": 1.0
|
| 545 |
-
},
|
| 546 |
-
{
|
| 547 |
-
"acc": 1.0,
|
| 548 |
-
"precision": 1.0,
|
| 549 |
-
"recall": 1.0,
|
| 550 |
-
"f1": 1.0
|
| 551 |
-
},
|
| 552 |
-
{
|
| 553 |
-
"acc": 1.0,
|
| 554 |
-
"precision": 1.0,
|
| 555 |
-
"recall": 1.0,
|
| 556 |
-
"f1": 1.0
|
| 557 |
-
},
|
| 558 |
-
{
|
| 559 |
-
"acc": 1.0,
|
| 560 |
-
"precision": 1.0,
|
| 561 |
-
"recall": 1.0,
|
| 562 |
-
"f1": 1.0
|
| 563 |
-
},
|
| 564 |
-
{
|
| 565 |
-
"acc": 1.0,
|
| 566 |
-
"precision": 1.0,
|
| 567 |
-
"recall": 1.0,
|
| 568 |
-
"f1": 1.0
|
| 569 |
-
},
|
| 570 |
-
{
|
| 571 |
-
"acc": 1.0,
|
| 572 |
-
"precision": 1.0,
|
| 573 |
-
"recall": 1.0,
|
| 574 |
-
"f1": 1.0
|
| 575 |
-
},
|
| 576 |
-
{
|
| 577 |
-
"acc": 1.0,
|
| 578 |
-
"precision": 1.0,
|
| 579 |
-
"recall": 1.0,
|
| 580 |
-
"f1": 1.0
|
| 581 |
-
}
|
| 582 |
-
]
|
| 583 |
-
},
|
| 584 |
-
"medium": {
|
| 585 |
-
"n_nodes": 25,
|
| 586 |
-
"n_edges": 27,
|
| 587 |
-
"gnn_final": {
|
| 588 |
-
"acc": 0.9914,
|
| 589 |
-
"precision": 0.982778750729714,
|
| 590 |
-
"recall": 0.9920447849145551,
|
| 591 |
-
"f1": 0.9873900293255131
|
| 592 |
-
},
|
| 593 |
-
"baseline_direct_neighbors": {
|
| 594 |
-
"acc": 0.8301,
|
| 595 |
-
"precision": 1.0,
|
| 596 |
-
"recall": 0.4994107248084856,
|
| 597 |
-
"f1": 0.6661426606405974
|
| 598 |
-
},
|
| 599 |
-
"improvement_f1_pp": 32.124736868491574,
|
| 600 |
-
"train_loss_curve": [
|
| 601 |
-
0.18512494587464606,
|
| 602 |
-
0.05774239192842651,
|
| 603 |
-
0.04035148839658183,
|
| 604 |
-
0.03685507851154424,
|
| 605 |
-
0.034016887983169666,
|
| 606 |
-
0.03193854558186021,
|
| 607 |
-
0.030314448321928544,
|
| 608 |
-
0.028890588828011224,
|
| 609 |
-
0.02627120438580584,
|
| 610 |
-
0.02676936000857496,
|
| 611 |
-
0.02735587336003725,
|
| 612 |
-
0.024704556535801756,
|
| 613 |
-
0.023389738032454397,
|
| 614 |
-
0.02484239745095036,
|
| 615 |
-
0.022598365899086623,
|
| 616 |
-
0.022097759216314333,
|
| 617 |
-
0.021880711925624425,
|
| 618 |
-
0.023672257099118552,
|
| 619 |
-
0.021815840122002862,
|
| 620 |
-
0.021538631150760885,
|
| 621 |
-
0.021590486920307173,
|
| 622 |
-
0.020993219244996,
|
| 623 |
-
0.021660113581202914,
|
| 624 |
-
0.02028199757042485,
|
| 625 |
-
0.021449406110984975,
|
| 626 |
-
0.02049649202735325,
|
| 627 |
-
0.02005596899437715,
|
| 628 |
-
0.02060316097080978,
|
| 629 |
-
0.02082035162168178,
|
| 630 |
-
0.020935066080168856,
|
| 631 |
-
0.0209964800781561,
|
| 632 |
-
0.019652295691733542,
|
| 633 |
-
0.020470858438760543,
|
| 634 |
-
0.020456047435481396,
|
| 635 |
-
0.020529603496513553,
|
| 636 |
-
0.019996260003822708,
|
| 637 |
-
0.021328506347361064,
|
| 638 |
-
0.019778630244522907,
|
| 639 |
-
0.01971426555108731,
|
| 640 |
-
0.019847191254493045,
|
| 641 |
-
0.01984119418810368,
|
| 642 |
-
0.02021396374486143,
|
| 643 |
-
0.01946370021810413,
|
| 644 |
-
0.019111871498224214,
|
| 645 |
-
0.019667785586758944,
|
| 646 |
-
0.021675049597691873,
|
| 647 |
-
0.01897557202284267,
|
| 648 |
-
0.01971483370839516,
|
| 649 |
-
0.01965866965101487,
|
| 650 |
-
0.01936112277971507,
|
| 651 |
-
0.01895255452432814,
|
| 652 |
-
0.02035098125927439,
|
| 653 |
-
0.01909720691408324,
|
| 654 |
-
0.019500281907226687,
|
| 655 |
-
0.019117790717674256,
|
| 656 |
-
0.018927754213147425,
|
| 657 |
-
0.020313845976115717,
|
| 658 |
-
0.019341792678655486,
|
| 659 |
-
0.01890229735773205,
|
| 660 |
-
0.019833170414518056,
|
| 661 |
-
0.01948640772390163,
|
| 662 |
-
0.019305320678627013,
|
| 663 |
-
0.019213381035159603,
|
| 664 |
-
0.020478221997059808,
|
| 665 |
-
0.01936127331570382,
|
| 666 |
-
0.019158014420631225,
|
| 667 |
-
0.019090143173694583,
|
| 668 |
-
0.020291763241906225,
|
| 669 |
-
0.01900654871721499,
|
| 670 |
-
0.019815083033949698,
|
| 671 |
-
0.019103285589502736,
|
| 672 |
-
0.018360809753397392,
|
| 673 |
-
0.019985065603578676,
|
| 674 |
-
0.01858524212906661,
|
| 675 |
-
0.02056734084818314,
|
| 676 |
-
0.01856864124721938,
|
| 677 |
-
0.01852369899036554,
|
| 678 |
-
0.018906581267301003,
|
| 679 |
-
0.01927234342475787,
|
| 680 |
-
0.018721831301170885
|
| 681 |
-
],
|
| 682 |
-
"test_metric_curve": [
|
| 683 |
-
{
|
| 684 |
-
"acc": 0.9816,
|
| 685 |
-
"precision": 0.9819819819819819,
|
| 686 |
-
"recall": 0.9634649381261049,
|
| 687 |
-
"f1": 0.9726353361094586
|
| 688 |
-
},
|
| 689 |
-
{
|
| 690 |
-
"acc": 0.9885,
|
| 691 |
-
"precision": 0.9742551345096905,
|
| 692 |
-
"recall": 0.9923394225103123,
|
| 693 |
-
"f1": 0.9832141293241862
|
| 694 |
-
},
|
| 695 |
-
{
|
| 696 |
-
"acc": 0.988,
|
| 697 |
-
"precision": 0.9720299884659747,
|
| 698 |
-
"recall": 0.993223335297584,
|
| 699 |
-
"f1": 0.9825123870591663
|
| 700 |
-
},
|
| 701 |
-
{
|
| 702 |
-
"acc": 0.9892,
|
| 703 |
-
"precision": 0.986094674556213,
|
| 704 |
-
"recall": 0.9820271066588097,
|
| 705 |
-
"f1": 0.9840566873339238
|
| 706 |
-
},
|
| 707 |
-
{
|
| 708 |
-
"acc": 0.9916,
|
| 709 |
-
"precision": 0.9825072886297376,
|
| 710 |
-
"recall": 0.9929286977018268,
|
| 711 |
-
"f1": 0.9876905041031652
|
| 712 |
-
},
|
| 713 |
-
{
|
| 714 |
-
"acc": 0.9913,
|
| 715 |
-
"precision": 0.9824919754887657,
|
| 716 |
-
"recall": 0.9920447849145551,
|
| 717 |
-
"f1": 0.9872452719542588
|
| 718 |
-
},
|
| 719 |
-
{
|
| 720 |
-
"acc": 0.9909,
|
| 721 |
-
"precision": 0.9847373055474024,
|
| 722 |
-
"recall": 0.9885091337654685,
|
| 723 |
-
"f1": 0.9866196147625349
|
| 724 |
-
},
|
| 725 |
-
{
|
| 726 |
-
"acc": 0.9857,
|
| 727 |
-
"precision": 0.9954282231027126,
|
| 728 |
-
"recall": 0.9622863877430761,
|
| 729 |
-
"f1": 0.9785767790262172
|
| 730 |
-
},
|
| 731 |
-
{
|
| 732 |
-
"acc": 0.9882,
|
| 733 |
-
"precision": 0.9761627906976744,
|
| 734 |
-
"recall": 0.9893930465527401,
|
| 735 |
-
"f1": 0.9827333918642083
|
| 736 |
-
},
|
| 737 |
-
{
|
| 738 |
-
"acc": 0.9912,
|
| 739 |
-
"precision": 0.9833333333333333,
|
| 740 |
-
"recall": 0.9908662345315262,
|
| 741 |
-
"f1": 0.9870854123862635
|
| 742 |
-
},
|
| 743 |
-
{
|
| 744 |
-
"acc": 0.9911,
|
| 745 |
-
"precision": 0.9864586399764498,
|
| 746 |
-
"recall": 0.9873305833824396,
|
| 747 |
-
"f1": 0.9868944190840818
|
| 748 |
-
},
|
| 749 |
-
{
|
| 750 |
-
"acc": 0.9842,
|
| 751 |
-
"precision": 0.997539975399754,
|
| 752 |
-
"recall": 0.9558043606364172,
|
| 753 |
-
"f1": 0.9762263015347576
|
| 754 |
-
},
|
| 755 |
-
{
|
| 756 |
-
"acc": 0.9872,
|
| 757 |
-
"precision": 0.9936517533252721,
|
| 758 |
-
"recall": 0.9684737772539777,
|
| 759 |
-
"f1": 0.9809012235153686
|
| 760 |
-
},
|
| 761 |
-
{
|
| 762 |
-
"acc": 0.9919,
|
| 763 |
-
"precision": 0.9825225750072822,
|
| 764 |
-
"recall": 0.9938126104890984,
|
| 765 |
-
"f1": 0.9881353449538597
|
| 766 |
-
},
|
| 767 |
-
{
|
| 768 |
-
"acc": 0.9905,
|
| 769 |
-
"precision": 0.9864346800353878,
|
| 770 |
-
"recall": 0.9855627578078963,
|
| 771 |
-
"f1": 0.9859985261606485
|
| 772 |
-
},
|
| 773 |
-
{
|
| 774 |
-
"acc": 0.9903,
|
| 775 |
-
"precision": 0.9867139061116031,
|
| 776 |
-
"recall": 0.9846788450206246,
|
| 777 |
-
"f1": 0.9856953251732783
|
| 778 |
-
},
|
| 779 |
-
{
|
| 780 |
-
"acc": 0.9912,
|
| 781 |
-
"precision": 0.9833333333333333,
|
| 782 |
-
"recall": 0.9908662345315262,
|
| 783 |
-
"f1": 0.9870854123862635
|
| 784 |
-
},
|
| 785 |
-
{
|
| 786 |
-
"acc": 0.9917,
|
| 787 |
-
"precision": 0.9827938174394867,
|
| 788 |
-
"recall": 0.9929286977018268,
|
| 789 |
-
"f1": 0.9878352630807563
|
| 790 |
-
},
|
| 791 |
-
{
|
| 792 |
-
"acc": 0.9914,
|
| 793 |
-
"precision": 0.9822157434402332,
|
| 794 |
-
"recall": 0.9926340601060696,
|
| 795 |
-
"f1": 0.9873974208675265
|
| 796 |
-
},
|
| 797 |
-
{
|
| 798 |
-
"acc": 0.9914,
|
| 799 |
-
"precision": 0.9833430742255991,
|
| 800 |
-
"recall": 0.9914555097230406,
|
| 801 |
-
"f1": 0.9873826291079812
|
| 802 |
-
},
|
| 803 |
-
{
|
| 804 |
-
"acc": 0.9908,
|
| 805 |
-
"precision": 0.986446670595168,
|
| 806 |
-
"recall": 0.986446670595168,
|
| 807 |
-
"f1": 0.986446670595168
|
| 808 |
-
},
|
| 809 |
-
{
|
| 810 |
-
"acc": 0.9908,
|
| 811 |
-
"precision": 0.986446670595168,
|
| 812 |
-
"recall": 0.986446670595168,
|
| 813 |
-
"f1": 0.986446670595168
|
| 814 |
-
},
|
| 815 |
-
{
|
| 816 |
-
"acc": 0.9909,
|
| 817 |
-
"precision": 0.9858781994704324,
|
| 818 |
-
"recall": 0.9873305833824396,
|
| 819 |
-
"f1": 0.9866038569115266
|
| 820 |
-
},
|
| 821 |
-
{
|
| 822 |
-
"acc": 0.9912,
|
| 823 |
-
"precision": 0.9833333333333333,
|
| 824 |
-
"recall": 0.9908662345315262,
|
| 825 |
-
"f1": 0.9870854123862635
|
| 826 |
-
},
|
| 827 |
-
{
|
| 828 |
-
"acc": 0.9915,
|
| 829 |
-
"precision": 0.9827837758972863,
|
| 830 |
-
"recall": 0.9923394225103123,
|
| 831 |
-
"f1": 0.9875384840932414
|
| 832 |
-
},
|
| 833 |
-
{
|
| 834 |
-
"acc": 0.9907,
|
| 835 |
-
"precision": 0.9873043991733097,
|
| 836 |
-
"recall": 0.985268120212139,
|
| 837 |
-
"f1": 0.9862852086712873
|
| 838 |
-
},
|
| 839 |
-
{
|
| 840 |
-
"acc": 0.9919,
|
| 841 |
-
"precision": 0.9825225750072822,
|
| 842 |
-
"recall": 0.9938126104890984,
|
| 843 |
-
"f1": 0.9881353449538597
|
| 844 |
-
},
|
| 845 |
-
{
|
| 846 |
-
"acc": 0.9914,
|
| 847 |
-
"precision": 0.982778750729714,
|
| 848 |
-
"recall": 0.9920447849145551,
|
| 849 |
-
"f1": 0.9873900293255131
|
| 850 |
-
},
|
| 851 |
-
{
|
| 852 |
-
"acc": 0.9916,
|
| 853 |
-
"precision": 0.9777713625866051,
|
| 854 |
-
"recall": 0.9979375368296994,
|
| 855 |
-
"f1": 0.9877515310586177
|
| 856 |
-
},
|
| 857 |
-
{
|
| 858 |
-
"acc": 0.9901,
|
| 859 |
-
"precision": 0.9869937924918711,
|
| 860 |
-
"recall": 0.983794932233353,
|
| 861 |
-
"f1": 0.9853917662682603
|
| 862 |
-
},
|
| 863 |
-
{
|
| 864 |
-
"acc": 0.9914,
|
| 865 |
-
"precision": 0.982778750729714,
|
| 866 |
-
"recall": 0.9920447849145551,
|
| 867 |
-
"f1": 0.9873900293255131
|
| 868 |
-
},
|
| 869 |
-
{
|
| 870 |
-
"acc": 0.9904,
|
| 871 |
-
"precision": 0.9872931442080378,
|
| 872 |
-
"recall": 0.9843842074248674,
|
| 873 |
-
"f1": 0.9858365299498378
|
| 874 |
-
},
|
| 875 |
-
{
|
| 876 |
-
"acc": 0.9914,
|
| 877 |
-
"precision": 0.982778750729714,
|
| 878 |
-
"recall": 0.9920447849145551,
|
| 879 |
-
"f1": 0.9873900293255131
|
| 880 |
-
},
|
| 881 |
-
{
|
| 882 |
-
"acc": 0.9887,
|
| 883 |
-
"precision": 0.993680409268733,
|
| 884 |
-
"recall": 0.9728933411903359,
|
| 885 |
-
"f1": 0.9831770135477147
|
| 886 |
-
},
|
| 887 |
-
{
|
| 888 |
-
"acc": 0.9912,
|
| 889 |
-
"precision": 0.9833333333333333,
|
| 890 |
-
"recall": 0.9908662345315262,
|
| 891 |
-
"f1": 0.9870854123862635
|
| 892 |
-
},
|
| 893 |
-
{
|
| 894 |
-
"acc": 0.9913,
|
| 895 |
-
"precision": 0.983338205203157,
|
| 896 |
-
"recall": 0.9911608721272834,
|
| 897 |
-
"f1": 0.9872340425531914
|
| 898 |
-
},
|
| 899 |
-
{
|
| 900 |
-
"acc": 0.9915,
|
| 901 |
-
"precision": 0.9827837758972863,
|
| 902 |
-
"recall": 0.9923394225103123,
|
| 903 |
-
"f1": 0.9875384840932414
|
| 904 |
-
},
|
| 905 |
-
{
|
| 906 |
-
"acc": 0.991,
|
| 907 |
-
"precision": 0.9858823529411764,
|
| 908 |
-
"recall": 0.9876252209781968,
|
| 909 |
-
"f1": 0.986753017368266
|
| 910 |
-
},
|
| 911 |
-
{
|
| 912 |
-
"acc": 0.9905,
|
| 913 |
-
"precision": 0.9870091526424565,
|
| 914 |
-
"recall": 0.9849734826163818,
|
| 915 |
-
"f1": 0.9859902669222829
|
| 916 |
-
},
|
| 917 |
-
{
|
| 918 |
-
"acc": 0.9912,
|
| 919 |
-
"precision": 0.9830508474576272,
|
| 920 |
-
"recall": 0.9911608721272834,
|
| 921 |
-
"f1": 0.9870892018779343
|
| 922 |
-
},
|
| 923 |
-
{
|
| 924 |
-
"acc": 0.9911,
|
| 925 |
-
"precision": 0.9822001750802452,
|
| 926 |
-
"recall": 0.9917501473187978,
|
| 927 |
-
"f1": 0.9869520598152763
|
| 928 |
-
},
|
| 929 |
-
{
|
| 930 |
-
"acc": 0.9901,
|
| 931 |
-
"precision": 0.9887273805992287,
|
| 932 |
-
"recall": 0.9820271066588097,
|
| 933 |
-
"f1": 0.9853658536585367
|
| 934 |
-
},
|
| 935 |
-
{
|
| 936 |
-
"acc": 0.9914,
|
| 937 |
-
"precision": 0.982778750729714,
|
| 938 |
-
"recall": 0.9920447849145551,
|
| 939 |
-
"f1": 0.9873900293255131
|
| 940 |
-
},
|
| 941 |
-
{
|
| 942 |
-
"acc": 0.9907,
|
| 943 |
-
"precision": 0.9833089311859443,
|
| 944 |
-
"recall": 0.9893930465527401,
|
| 945 |
-
"f1": 0.9863416066970185
|
| 946 |
-
},
|
| 947 |
-
{
|
| 948 |
-
"acc": 0.9914,
|
| 949 |
-
"precision": 0.982778750729714,
|
| 950 |
-
"recall": 0.9920447849145551,
|
| 951 |
-
"f1": 0.9873900293255131
|
| 952 |
-
},
|
| 953 |
-
{
|
| 954 |
-
"acc": 0.9908,
|
| 955 |
-
"precision": 0.986446670595168,
|
| 956 |
-
"recall": 0.986446670595168,
|
| 957 |
-
"f1": 0.986446670595168
|
| 958 |
-
},
|
| 959 |
-
{
|
| 960 |
-
"acc": 0.991,
|
| 961 |
-
"precision": 0.9833235810415447,
|
| 962 |
-
"recall": 0.9902769593400118,
|
| 963 |
-
"f1": 0.9867880211391661
|
| 964 |
-
},
|
| 965 |
-
{
|
| 966 |
-
"acc": 0.9912,
|
| 967 |
-
"precision": 0.9833333333333333,
|
| 968 |
-
"recall": 0.9908662345315262,
|
| 969 |
-
"f1": 0.9870854123862635
|
| 970 |
-
},
|
| 971 |
-
{
|
| 972 |
-
"acc": 0.9912,
|
| 973 |
-
"precision": 0.9824868651488616,
|
| 974 |
-
"recall": 0.9917501473187978,
|
| 975 |
-
"f1": 0.9870967741935485
|
| 976 |
-
},
|
| 977 |
-
{
|
| 978 |
-
"acc": 0.9909,
|
| 979 |
-
"precision": 0.9838851450336947,
|
| 980 |
-
"recall": 0.9893930465527401,
|
| 981 |
-
"f1": 0.9866314088438372
|
| 982 |
-
},
|
| 983 |
-
{
|
| 984 |
-
"acc": 0.9911,
|
| 985 |
-
"precision": 0.9833284586136297,
|
| 986 |
-
"recall": 0.990571596935769,
|
| 987 |
-
"f1": 0.9869367385879936
|
| 988 |
-
},
|
| 989 |
-
{
|
| 990 |
-
"acc": 0.9913,
|
| 991 |
-
"precision": 0.9836209417958467,
|
| 992 |
-
"recall": 0.9908662345315262,
|
| 993 |
-
"f1": 0.9872302950242183
|
| 994 |
-
},
|
| 995 |
-
{
|
| 996 |
-
"acc": 0.9914,
|
| 997 |
-
"precision": 0.982778750729714,
|
| 998 |
-
"recall": 0.9920447849145551,
|
| 999 |
-
"f1": 0.9873900293255131
|
| 1000 |
-
},
|
| 1001 |
-
{
|
| 1002 |
-
"acc": 0.991,
|
| 1003 |
-
"precision": 0.9858823529411764,
|
| 1004 |
-
"recall": 0.9876252209781968,
|
| 1005 |
-
"f1": 0.986753017368266
|
| 1006 |
-
},
|
| 1007 |
-
{
|
| 1008 |
-
"acc": 0.9912,
|
| 1009 |
-
"precision": 0.9830508474576272,
|
| 1010 |
-
"recall": 0.9911608721272834,
|
| 1011 |
-
"f1": 0.9870892018779343
|
| 1012 |
-
},
|
| 1013 |
-
{
|
| 1014 |
-
"acc": 0.9914,
|
| 1015 |
-
"precision": 0.982778750729714,
|
| 1016 |
-
"recall": 0.9920447849145551,
|
| 1017 |
-
"f1": 0.9873900293255131
|
| 1018 |
-
},
|
| 1019 |
-
{
|
| 1020 |
-
"acc": 0.9899,
|
| 1021 |
-
"precision": 0.9875629256736749,
|
| 1022 |
-
"recall": 0.9826163818503241,
|
| 1023 |
-
"f1": 0.9850834440998375
|
| 1024 |
-
},
|
| 1025 |
-
{
|
| 1026 |
-
"acc": 0.9908,
|
| 1027 |
-
"precision": 0.986446670595168,
|
| 1028 |
-
"recall": 0.986446670595168,
|
| 1029 |
-
"f1": 0.986446670595168
|
| 1030 |
-
},
|
| 1031 |
-
{
|
| 1032 |
-
"acc": 0.9915,
|
| 1033 |
-
"precision": 0.9819399941741916,
|
| 1034 |
-
"recall": 0.993223335297584,
|
| 1035 |
-
"f1": 0.9875494360626923
|
| 1036 |
-
},
|
| 1037 |
-
{
|
| 1038 |
-
"acc": 0.9914,
|
| 1039 |
-
"precision": 0.982778750729714,
|
| 1040 |
-
"recall": 0.9920447849145551,
|
| 1041 |
-
"f1": 0.9873900293255131
|
| 1042 |
-
},
|
| 1043 |
-
{
|
| 1044 |
-
"acc": 0.9906,
|
| 1045 |
-
"precision": 0.987012987012987,
|
| 1046 |
-
"recall": 0.985268120212139,
|
| 1047 |
-
"f1": 0.9861397817752875
|
| 1048 |
-
},
|
| 1049 |
-
{
|
| 1050 |
-
"acc": 0.9908,
|
| 1051 |
-
"precision": 0.986446670595168,
|
| 1052 |
-
"recall": 0.986446670595168,
|
| 1053 |
-
"f1": 0.986446670595168
|
| 1054 |
-
},
|
| 1055 |
-
{
|
| 1056 |
-
"acc": 0.991,
|
| 1057 |
-
"precision": 0.9833235810415447,
|
| 1058 |
-
"recall": 0.9902769593400118,
|
| 1059 |
-
"f1": 0.9867880211391661
|
| 1060 |
-
},
|
| 1061 |
-
{
|
| 1062 |
-
"acc": 0.9907,
|
| 1063 |
-
"precision": 0.9864426760978485,
|
| 1064 |
-
"recall": 0.9861520329994107,
|
| 1065 |
-
"f1": 0.9862973331368794
|
| 1066 |
-
},
|
| 1067 |
-
{
|
| 1068 |
-
"acc": 0.9912,
|
| 1069 |
-
"precision": 0.9824868651488616,
|
| 1070 |
-
"recall": 0.9917501473187978,
|
| 1071 |
-
"f1": 0.9870967741935485
|
| 1072 |
-
},
|
| 1073 |
-
{
|
| 1074 |
-
"acc": 0.9911,
|
| 1075 |
-
"precision": 0.9833284586136297,
|
| 1076 |
-
"recall": 0.990571596935769,
|
| 1077 |
-
"f1": 0.9869367385879936
|
| 1078 |
-
},
|
| 1079 |
-
{
|
| 1080 |
-
"acc": 0.9908,
|
| 1081 |
-
"precision": 0.986446670595168,
|
| 1082 |
-
"recall": 0.986446670595168,
|
| 1083 |
-
"f1": 0.986446670595168
|
| 1084 |
-
},
|
| 1085 |
-
{
|
| 1086 |
-
"acc": 0.9914,
|
| 1087 |
-
"precision": 0.982778750729714,
|
| 1088 |
-
"recall": 0.9920447849145551,
|
| 1089 |
-
"f1": 0.9873900293255131
|
| 1090 |
-
},
|
| 1091 |
-
{
|
| 1092 |
-
"acc": 0.9914,
|
| 1093 |
-
"precision": 0.982778750729714,
|
| 1094 |
-
"recall": 0.9920447849145551,
|
| 1095 |
-
"f1": 0.9873900293255131
|
| 1096 |
-
},
|
| 1097 |
-
{
|
| 1098 |
-
"acc": 0.9916,
|
| 1099 |
-
"precision": 0.9825072886297376,
|
| 1100 |
-
"recall": 0.9929286977018268,
|
| 1101 |
-
"f1": 0.9876905041031652
|
| 1102 |
-
},
|
| 1103 |
-
{
|
| 1104 |
-
"acc": 0.9914,
|
| 1105 |
-
"precision": 0.982778750729714,
|
| 1106 |
-
"recall": 0.9920447849145551,
|
| 1107 |
-
"f1": 0.9873900293255131
|
| 1108 |
-
},
|
| 1109 |
-
{
|
| 1110 |
-
"acc": 0.9914,
|
| 1111 |
-
"precision": 0.982778750729714,
|
| 1112 |
-
"recall": 0.9920447849145551,
|
| 1113 |
-
"f1": 0.9873900293255131
|
| 1114 |
-
},
|
| 1115 |
-
{
|
| 1116 |
-
"acc": 0.9913,
|
| 1117 |
-
"precision": 0.9824919754887657,
|
| 1118 |
-
"recall": 0.9920447849145551,
|
| 1119 |
-
"f1": 0.9872452719542588
|
| 1120 |
-
},
|
| 1121 |
-
{
|
| 1122 |
-
"acc": 0.9915,
|
| 1123 |
-
"precision": 0.9827837758972863,
|
| 1124 |
-
"recall": 0.9923394225103123,
|
| 1125 |
-
"f1": 0.9875384840932414
|
| 1126 |
-
},
|
| 1127 |
-
{
|
| 1128 |
-
"acc": 0.9916,
|
| 1129 |
-
"precision": 0.9827887981330222,
|
| 1130 |
-
"recall": 0.9926340601060696,
|
| 1131 |
-
"f1": 0.9876868953386104
|
| 1132 |
-
},
|
| 1133 |
-
{
|
| 1134 |
-
"acc": 0.9912,
|
| 1135 |
-
"precision": 0.982768691588785,
|
| 1136 |
-
"recall": 0.9914555097230406,
|
| 1137 |
-
"f1": 0.9870929891463771
|
| 1138 |
-
},
|
| 1139 |
-
{
|
| 1140 |
-
"acc": 0.9909,
|
| 1141 |
-
"precision": 0.9833187006145742,
|
| 1142 |
-
"recall": 0.9899823217442546,
|
| 1143 |
-
"f1": 0.986639260020555
|
| 1144 |
-
},
|
| 1145 |
-
{
|
| 1146 |
-
"acc": 0.9904,
|
| 1147 |
-
"precision": 0.987005316007088,
|
| 1148 |
-
"recall": 0.9846788450206246,
|
| 1149 |
-
"f1": 0.9858407079646017
|
| 1150 |
-
},
|
| 1151 |
-
{
|
| 1152 |
-
"acc": 0.9912,
|
| 1153 |
-
"precision": 0.982768691588785,
|
| 1154 |
-
"recall": 0.9914555097230406,
|
| 1155 |
-
"f1": 0.9870929891463771
|
| 1156 |
-
},
|
| 1157 |
-
{
|
| 1158 |
-
"acc": 0.9914,
|
| 1159 |
-
"precision": 0.982778750729714,
|
| 1160 |
-
"recall": 0.9920447849145551,
|
| 1161 |
-
"f1": 0.9873900293255131
|
| 1162 |
-
}
|
| 1163 |
-
]
|
| 1164 |
-
},
|
| 1165 |
-
"hard": {
|
| 1166 |
-
"n_nodes": 40,
|
| 1167 |
-
"n_edges": 44,
|
| 1168 |
-
"gnn_final": {
|
| 1169 |
-
"acc": 0.984,
|
| 1170 |
-
"precision": 0.9533980582524272,
|
| 1171 |
-
"recall": 0.9750354609929078,
|
| 1172 |
-
"f1": 0.9640953716690043
|
| 1173 |
-
},
|
| 1174 |
-
"baseline_direct_neighbors": {
|
| 1175 |
-
"acc": 0.88875,
|
| 1176 |
-
"precision": 1.0,
|
| 1177 |
-
"recall": 0.4950354609929078,
|
| 1178 |
-
"f1": 0.6622390891840607
|
| 1179 |
-
},
|
| 1180 |
-
"improvement_f1_pp": 30.185628248494357,
|
| 1181 |
-
"train_loss_curve": [
|
| 1182 |
-
0.15102637716173195,
|
| 1183 |
-
0.052633647776499856,
|
| 1184 |
-
0.04379157433440559,
|
| 1185 |
-
0.04003102573152864,
|
| 1186 |
-
0.03876525610721728,
|
| 1187 |
-
0.0369047760956164,
|
| 1188 |
-
0.036530632421345216,
|
| 1189 |
-
0.035830124779022296,
|
| 1190 |
-
0.0349417570647056,
|
| 1191 |
-
0.035263367522318734,
|
| 1192 |
-
0.03485661885762238,
|
| 1193 |
-
0.03493121563128079,
|
| 1194 |
-
0.032977926293009656,
|
| 1195 |
-
0.03394761107103841,
|
| 1196 |
-
0.033683306101149356,
|
| 1197 |
-
0.033089775294763965,
|
| 1198 |
-
0.0335856751325955,
|
| 1199 |
-
0.03272933466515315,
|
| 1200 |
-
0.032765767610715556,
|
| 1201 |
-
0.032717534617419004,
|
| 1202 |
-
0.03298612758413583,
|
| 1203 |
-
0.03169301031356008,
|
| 1204 |
-
0.0323142114428847,
|
| 1205 |
-
0.03186470089994691,
|
| 1206 |
-
0.032041587697027356,
|
| 1207 |
-
0.03211515340814367,
|
| 1208 |
-
0.032251973500227904,
|
| 1209 |
-
0.031999882343730864,
|
| 1210 |
-
0.03164813786187369,
|
| 1211 |
-
0.03160676156320551,
|
| 1212 |
-
0.031426732700598224,
|
| 1213 |
-
0.031241096474510413,
|
| 1214 |
-
0.03162557367896079,
|
| 1215 |
-
0.03154335625256863,
|
| 1216 |
-
0.03165931336190261,
|
| 1217 |
-
0.03097459732750576,
|
| 1218 |
-
0.03131493923773814,
|
| 1219 |
-
0.0311658642354123,
|
| 1220 |
-
0.030633534374135706,
|
| 1221 |
-
0.031252258909702506,
|
| 1222 |
-
0.030825211223787848,
|
| 1223 |
-
0.03053342323340803,
|
| 1224 |
-
0.030733022628217442,
|
| 1225 |
-
0.030747544990059397,
|
| 1226 |
-
0.030629911747484584,
|
| 1227 |
-
0.030457735169680745,
|
| 1228 |
-
0.03058615475141687,
|
| 1229 |
-
0.030597560634826552,
|
| 1230 |
-
0.030619746312839653,
|
| 1231 |
-
0.03066707000986935,
|
| 1232 |
-
0.03048766604950197,
|
| 1233 |
-
0.030287153372872126,
|
| 1234 |
-
0.0303783905812179,
|
| 1235 |
-
0.030595246432494606,
|
| 1236 |
-
0.03037994001944753,
|
| 1237 |
-
0.030246819483697437,
|
| 1238 |
-
0.03012882444020579,
|
| 1239 |
-
0.03024448805347947,
|
| 1240 |
-
0.030449683469725642,
|
| 1241 |
-
0.03048290506813919,
|
| 1242 |
-
0.030136575797458136,
|
| 1243 |
-
0.02994714516170643,
|
| 1244 |
-
0.030466000927322056,
|
| 1245 |
-
0.03019473605195526,
|
| 1246 |
-
0.02987939404982535,
|
| 1247 |
-
0.030137449657182513,
|
| 1248 |
-
0.030104370625325828,
|
| 1249 |
-
0.030588962311178875,
|
| 1250 |
-
0.029767145353838714,
|
| 1251 |
-
0.030284092916966984,
|
| 1252 |
-
0.03002391016312413,
|
| 1253 |
-
0.02992785992539757,
|
| 1254 |
-
0.030997538813613574,
|
| 1255 |
-
0.029848512160238896,
|
| 1256 |
-
0.030022954882957493,
|
| 1257 |
-
0.030052907403214705,
|
| 1258 |
-
0.02975074222330568,
|
| 1259 |
-
0.029870129619877842,
|
| 1260 |
-
0.02968558935528563,
|
| 1261 |
-
0.029977637300933564
|
| 1262 |
-
],
|
| 1263 |
-
"test_metric_curve": [
|
| 1264 |
-
{
|
| 1265 |
-
"acc": 0.978625,
|
| 1266 |
-
"precision": 0.9395194697597349,
|
| 1267 |
-
"recall": 0.9651063829787234,
|
| 1268 |
-
"f1": 0.9521410579345089
|
| 1269 |
-
},
|
| 1270 |
-
{
|
| 1271 |
-
"acc": 0.9813125,
|
| 1272 |
-
"precision": 0.9460730088495575,
|
| 1273 |
-
"recall": 0.9704964539007093,
|
| 1274 |
-
"f1": 0.9581291135695281
|
| 1275 |
-
},
|
| 1276 |
-
{
|
| 1277 |
-
"acc": 0.982,
|
| 1278 |
-
"precision": 0.9607173356105893,
|
| 1279 |
-
"recall": 0.9574468085106383,
|
| 1280 |
-
"f1": 0.959079283887468
|
| 1281 |
-
},
|
| 1282 |
-
{
|
| 1283 |
-
"acc": 0.9805625,
|
| 1284 |
-
"precision": 0.9649884259259259,
|
| 1285 |
-
"recall": 0.9460992907801419,
|
| 1286 |
-
"f1": 0.9554505085231342
|
| 1287 |
-
},
|
| 1288 |
-
{
|
| 1289 |
-
"acc": 0.98225,
|
| 1290 |
-
"precision": 0.952274630198158,
|
| 1291 |
-
"recall": 0.9679432624113475,
|
| 1292 |
-
"f1": 0.9600450196961171
|
| 1293 |
-
},
|
| 1294 |
-
{
|
| 1295 |
-
"acc": 0.98225,
|
| 1296 |
-
"precision": 0.9639278557114228,
|
| 1297 |
-
"recall": 0.955177304964539,
|
| 1298 |
-
"f1": 0.9595326303790253
|
| 1299 |
-
},
|
| 1300 |
-
{
|
| 1301 |
-
"acc": 0.982375,
|
| 1302 |
-
"precision": 0.9543289436817035,
|
| 1303 |
-
"recall": 0.9662411347517731,
|
| 1304 |
-
"f1": 0.9602480969833662
|
| 1305 |
-
},
|
| 1306 |
-
{
|
| 1307 |
-
"acc": 0.98375,
|
| 1308 |
-
"precision": 0.9543556916225995,
|
| 1309 |
-
"recall": 0.9727659574468085,
|
| 1310 |
-
"f1": 0.9634728856420341
|
| 1311 |
-
},
|
| 1312 |
-
{
|
| 1313 |
-
"acc": 0.98125,
|
| 1314 |
-
"precision": 0.9680696661828737,
|
| 1315 |
-
"recall": 0.9460992907801419,
|
| 1316 |
-
"f1": 0.9569583931133429
|
| 1317 |
-
},
|
| 1318 |
-
{
|
| 1319 |
-
"acc": 0.983,
|
| 1320 |
-
"precision": 0.965379113018598,
|
| 1321 |
-
"recall": 0.9571631205673758,
|
| 1322 |
-
"f1": 0.9612535612535612
|
| 1323 |
-
},
|
| 1324 |
-
{
|
| 1325 |
-
"acc": 0.984375,
|
| 1326 |
-
"precision": 0.9593267882187938,
|
| 1327 |
-
"recall": 0.9702127659574468,
|
| 1328 |
-
"f1": 0.9647390691114245
|
| 1329 |
-
},
|
| 1330 |
-
{
|
| 1331 |
-
"acc": 0.9836875,
|
| 1332 |
-
"precision": 0.9633730834752982,
|
| 1333 |
-
"recall": 0.9625531914893617,
|
| 1334 |
-
"f1": 0.9629629629629629
|
| 1335 |
-
},
|
| 1336 |
-
{
|
| 1337 |
-
"acc": 0.98425,
|
| 1338 |
-
"precision": 0.9507022858716607,
|
| 1339 |
-
"recall": 0.979290780141844,
|
| 1340 |
-
"f1": 0.9647847959754053
|
| 1341 |
-
},
|
| 1342 |
-
{
|
| 1343 |
-
"acc": 0.983,
|
| 1344 |
-
"precision": 0.9651129539605376,
|
| 1345 |
-
"recall": 0.9574468085106383,
|
| 1346 |
-
"f1": 0.9612645969809172
|
| 1347 |
-
},
|
| 1348 |
-
{
|
| 1349 |
-
"acc": 0.9840625,
|
| 1350 |
-
"precision": 0.9587542087542088,
|
| 1351 |
-
"recall": 0.9693617021276596,
|
| 1352 |
-
"f1": 0.9640287769784174
|
| 1353 |
-
},
|
| 1354 |
-
{
|
| 1355 |
-
"acc": 0.9835625,
|
| 1356 |
-
"precision": 0.966,
|
| 1357 |
-
"recall": 0.9591489361702128,
|
| 1358 |
-
"f1": 0.9625622775800712
|
| 1359 |
-
},
|
| 1360 |
-
{
|
| 1361 |
-
"acc": 0.9839375,
|
| 1362 |
-
"precision": 0.9600225225225225,
|
| 1363 |
-
"recall": 0.9673758865248226,
|
| 1364 |
-
"f1": 0.963685177335029
|
| 1365 |
-
},
|
| 1366 |
-
{
|
| 1367 |
-
"acc": 0.98425,
|
| 1368 |
-
"precision": 0.9405114401076716,
|
| 1369 |
-
"recall": 0.9912056737588653,
|
| 1370 |
-
"f1": 0.9651933701657459
|
| 1371 |
-
},
|
| 1372 |
-
{
|
| 1373 |
-
"acc": 0.9814375,
|
| 1374 |
-
"precision": 0.9686411149825784,
|
| 1375 |
-
"recall": 0.9463829787234043,
|
| 1376 |
-
"f1": 0.9573826947912182
|
| 1377 |
-
},
|
| 1378 |
-
{
|
| 1379 |
-
"acc": 0.9831875,
|
| 1380 |
-
"precision": 0.955512031337437,
|
| 1381 |
-
"recall": 0.9687943262411347,
|
| 1382 |
-
"f1": 0.9621073390618397
|
| 1383 |
-
},
|
| 1384 |
-
{
|
| 1385 |
-
"acc": 0.9836875,
|
| 1386 |
-
"precision": 0.9515771997786386,
|
| 1387 |
-
"recall": 0.9756028368794326,
|
| 1388 |
-
"f1": 0.9634402577391792
|
| 1389 |
-
},
|
| 1390 |
-
{
|
| 1391 |
-
"acc": 0.9860625,
|
| 1392 |
-
"precision": 0.9565818584070797,
|
| 1393 |
-
"recall": 0.9812765957446808,
|
| 1394 |
-
"f1": 0.9687718806889791
|
| 1395 |
-
},
|
| 1396 |
-
{
|
| 1397 |
-
"acc": 0.9835625,
|
| 1398 |
-
"precision": 0.9505524861878453,
|
| 1399 |
-
"recall": 0.9761702127659575,
|
| 1400 |
-
"f1": 0.9631910426871939
|
| 1401 |
-
},
|
| 1402 |
-
{
|
| 1403 |
-
"acc": 0.9853125,
|
| 1404 |
-
"precision": 0.9472539423599783,
|
| 1405 |
-
"recall": 0.9883687943262411,
|
| 1406 |
-
"f1": 0.9673747049840344
|
| 1407 |
-
},
|
| 1408 |
-
{
|
| 1409 |
-
"acc": 0.9860625,
|
| 1410 |
-
"precision": 0.9479110146500271,
|
| 1411 |
-
"recall": 0.9912056737588653,
|
| 1412 |
-
"f1": 0.9690750242684788
|
| 1413 |
-
},
|
| 1414 |
-
{
|
| 1415 |
-
"acc": 0.982875,
|
| 1416 |
-
"precision": 0.9645613032294942,
|
| 1417 |
-
"recall": 0.9574468085106383,
|
| 1418 |
-
"f1": 0.960990888382688
|
| 1419 |
-
},
|
| 1420 |
-
{
|
| 1421 |
-
"acc": 0.9843125,
|
| 1422 |
-
"precision": 0.9606077658975802,
|
| 1423 |
-
"recall": 0.9685106382978723,
|
| 1424 |
-
"f1": 0.9645430145500776
|
| 1425 |
-
},
|
| 1426 |
-
{
|
| 1427 |
-
"acc": 0.9840625,
|
| 1428 |
-
"precision": 0.9501651982378855,
|
| 1429 |
-
"recall": 0.9790070921985815,
|
| 1430 |
-
"f1": 0.9643705463182898
|
| 1431 |
-
},
|
| 1432 |
-
{
|
| 1433 |
-
"acc": 0.983375,
|
| 1434 |
-
"precision": 0.9568264648163723,
|
| 1435 |
-
"recall": 0.9682269503546099,
|
| 1436 |
-
"f1": 0.9624929498025946
|
| 1437 |
-
},
|
| 1438 |
-
{
|
| 1439 |
-
"acc": 0.98375,
|
| 1440 |
-
"precision": 0.9505934308584046,
|
| 1441 |
-
"recall": 0.9770212765957447,
|
| 1442 |
-
"f1": 0.9636261891438165
|
| 1443 |
-
},
|
| 1444 |
-
{
|
| 1445 |
-
"acc": 0.9845,
|
| 1446 |
-
"precision": 0.9555184876285794,
|
| 1447 |
-
"recall": 0.9750354609929078,
|
| 1448 |
-
"f1": 0.9651783206964335
|
| 1449 |
-
},
|
| 1450 |
-
{
|
| 1451 |
-
"acc": 0.9830625,
|
| 1452 |
-
"precision": 0.9557422969187676,
|
| 1453 |
-
"recall": 0.9679432624113475,
|
| 1454 |
-
"f1": 0.9618040873854828
|
| 1455 |
-
},
|
| 1456 |
-
{
|
| 1457 |
-
"acc": 0.983375,
|
| 1458 |
-
"precision": 0.9555493430248811,
|
| 1459 |
-
"recall": 0.969645390070922,
|
| 1460 |
-
"f1": 0.9625457617572516
|
| 1461 |
-
},
|
| 1462 |
-
{
|
| 1463 |
-
"acc": 0.984,
|
| 1464 |
-
"precision": 0.9511454595638973,
|
| 1465 |
-
"recall": 0.9775886524822694,
|
| 1466 |
-
"f1": 0.9641857862339116
|
| 1467 |
-
},
|
| 1468 |
-
{
|
| 1469 |
-
"acc": 0.9845625,
|
| 1470 |
-
"precision": 0.9611705120990434,
|
| 1471 |
-
"recall": 0.9690780141843972,
|
| 1472 |
-
"f1": 0.9651080661110327
|
| 1473 |
-
},
|
| 1474 |
-
{
|
| 1475 |
-
"acc": 0.984625,
|
| 1476 |
-
"precision": 0.9565580618212197,
|
| 1477 |
-
"recall": 0.9744680851063829,
|
| 1478 |
-
"f1": 0.9654300168634065
|
| 1479 |
-
},
|
| 1480 |
-
{
|
| 1481 |
-
"acc": 0.9846875,
|
| 1482 |
-
"precision": 0.9563160823594881,
|
| 1483 |
-
"recall": 0.9750354609929078,
|
| 1484 |
-
"f1": 0.9655850540806294
|
| 1485 |
-
},
|
| 1486 |
-
{
|
| 1487 |
-
"acc": 0.9856875,
|
| 1488 |
-
"precision": 0.9461288576069301,
|
| 1489 |
-
"recall": 0.9914893617021276,
|
| 1490 |
-
"f1": 0.9682781548690954
|
| 1491 |
-
},
|
| 1492 |
-
{
|
| 1493 |
-
"acc": 0.9841875,
|
| 1494 |
-
"precision": 0.9631936579841449,
|
| 1495 |
-
"recall": 0.9651063829787234,
|
| 1496 |
-
"f1": 0.9641490718435596
|
| 1497 |
-
},
|
| 1498 |
-
{
|
| 1499 |
-
"acc": 0.98475,
|
| 1500 |
-
"precision": 0.9560745065332221,
|
| 1501 |
-
"recall": 0.9756028368794326,
|
| 1502 |
-
"f1": 0.9657399606852007
|
| 1503 |
-
},
|
| 1504 |
-
{
|
| 1505 |
-
"acc": 0.9836875,
|
| 1506 |
-
"precision": 0.9558659217877095,
|
| 1507 |
-
"recall": 0.9707801418439717,
|
| 1508 |
-
"f1": 0.963265306122449
|
| 1509 |
-
},
|
| 1510 |
-
{
|
| 1511 |
-
"acc": 0.9854375,
|
| 1512 |
-
"precision": 0.9497267759562842,
|
| 1513 |
-
"recall": 0.9860992907801418,
|
| 1514 |
-
"f1": 0.967571329157968
|
| 1515 |
-
},
|
| 1516 |
-
{
|
| 1517 |
-
"acc": 0.9844375,
|
| 1518 |
-
"precision": 0.9502473886750962,
|
| 1519 |
-
"recall": 0.9807092198581561,
|
| 1520 |
-
"f1": 0.9652380287588997
|
| 1521 |
-
},
|
| 1522 |
-
{
|
| 1523 |
-
"acc": 0.9844375,
|
| 1524 |
-
"precision": 0.9601123595505618,
|
| 1525 |
-
"recall": 0.969645390070922,
|
| 1526 |
-
"f1": 0.9648553281580804
|
| 1527 |
-
},
|
| 1528 |
-
{
|
| 1529 |
-
"acc": 0.98475,
|
| 1530 |
-
"precision": 0.957345971563981,
|
| 1531 |
-
"recall": 0.9741843971631206,
|
| 1532 |
-
"f1": 0.9656917885264341
|
| 1533 |
-
},
|
| 1534 |
-
{
|
| 1535 |
-
"acc": 0.983625,
|
| 1536 |
-
"precision": 0.9543302701197438,
|
| 1537 |
-
"recall": 0.9721985815602837,
|
| 1538 |
-
"f1": 0.9631815626756605
|
| 1539 |
-
},
|
| 1540 |
-
{
|
| 1541 |
-
"acc": 0.9839375,
|
| 1542 |
-
"precision": 0.9526315789473684,
|
| 1543 |
-
"recall": 0.9756028368794326,
|
| 1544 |
-
"f1": 0.9639803784162578
|
| 1545 |
-
},
|
| 1546 |
-
{
|
| 1547 |
-
"acc": 0.9833125,
|
| 1548 |
-
"precision": 0.9509966777408638,
|
| 1549 |
-
"recall": 0.9744680851063829,
|
| 1550 |
-
"f1": 0.962589323245061
|
| 1551 |
-
},
|
| 1552 |
-
{
|
| 1553 |
-
"acc": 0.98425,
|
| 1554 |
-
"precision": 0.9499587572174869,
|
| 1555 |
-
"recall": 0.9801418439716312,
|
| 1556 |
-
"f1": 0.9648142976822116
|
| 1557 |
-
},
|
| 1558 |
-
{
|
| 1559 |
-
"acc": 0.984375,
|
| 1560 |
-
"precision": 0.9590692458648724,
|
| 1561 |
-
"recall": 0.9704964539007093,
|
| 1562 |
-
"f1": 0.9647490129723633
|
| 1563 |
-
},
|
| 1564 |
-
{
|
| 1565 |
-
"acc": 0.9838125,
|
| 1566 |
-
"precision": 0.9528563505268997,
|
| 1567 |
-
"recall": 0.9747517730496454,
|
| 1568 |
-
"f1": 0.9636797083158043
|
| 1569 |
-
},
|
| 1570 |
-
{
|
| 1571 |
-
"acc": 0.9848125,
|
| 1572 |
-
"precision": 0.9553274139844617,
|
| 1573 |
-
"recall": 0.9767375886524823,
|
| 1574 |
-
"f1": 0.965913872913452
|
| 1575 |
-
},
|
| 1576 |
-
{
|
| 1577 |
-
"acc": 0.9836875,
|
| 1578 |
-
"precision": 0.9551031790295594,
|
| 1579 |
-
"recall": 0.9716312056737588,
|
| 1580 |
-
"f1": 0.963296301504711
|
| 1581 |
-
},
|
| 1582 |
-
{
|
| 1583 |
-
"acc": 0.9845,
|
| 1584 |
-
"precision": 0.9429575560962422,
|
| 1585 |
-
"recall": 0.9895035460992908,
|
| 1586 |
-
"f1": 0.965669988925803
|
| 1587 |
-
},
|
| 1588 |
-
{
|
| 1589 |
-
"acc": 0.982375,
|
| 1590 |
-
"precision": 0.9589583923011605,
|
| 1591 |
-
"recall": 0.9611347517730496,
|
| 1592 |
-
"f1": 0.9600453386228394
|
| 1593 |
-
},
|
| 1594 |
-
{
|
| 1595 |
-
"acc": 0.984375,
|
| 1596 |
-
"precision": 0.962439988703756,
|
| 1597 |
-
"recall": 0.9668085106382979,
|
| 1598 |
-
"f1": 0.9646193037078971
|
| 1599 |
-
},
|
| 1600 |
-
{
|
| 1601 |
-
"acc": 0.985625,
|
| 1602 |
-
"precision": 0.9517411571154374,
|
| 1603 |
-
"recall": 0.9846808510638297,
|
| 1604 |
-
"f1": 0.967930842163971
|
| 1605 |
-
},
|
| 1606 |
-
{
|
| 1607 |
-
"acc": 0.98325,
|
| 1608 |
-
"precision": 0.9596387242449901,
|
| 1609 |
-
"recall": 0.9645390070921985,
|
| 1610 |
-
"f1": 0.9620826259196378
|
| 1611 |
-
},
|
| 1612 |
-
{
|
| 1613 |
-
"acc": 0.984,
|
| 1614 |
-
"precision": 0.9647426784191072,
|
| 1615 |
-
"recall": 0.9625531914893617,
|
| 1616 |
-
"f1": 0.9636466912808862
|
| 1617 |
-
},
|
| 1618 |
-
{
|
| 1619 |
-
"acc": 0.984875,
|
| 1620 |
-
"precision": 0.9586476669460743,
|
| 1621 |
-
"recall": 0.9733333333333334,
|
| 1622 |
-
"f1": 0.9659346846846848
|
| 1623 |
-
},
|
| 1624 |
-
{
|
| 1625 |
-
"acc": 0.9850625,
|
| 1626 |
-
"precision": 0.9581706636921361,
|
| 1627 |
-
"recall": 0.9747517730496454,
|
| 1628 |
-
"f1": 0.9663900998453102
|
| 1629 |
-
},
|
| 1630 |
-
{
|
| 1631 |
-
"acc": 0.9836875,
|
| 1632 |
-
"precision": 0.9493392070484582,
|
| 1633 |
-
"recall": 0.9781560283687943,
|
| 1634 |
-
"f1": 0.9635322062316614
|
| 1635 |
-
},
|
| 1636 |
-
{
|
| 1637 |
-
"acc": 0.983125,
|
| 1638 |
-
"precision": 0.9575484959235311,
|
| 1639 |
-
"recall": 0.9662411347517731,
|
| 1640 |
-
"f1": 0.9618751765038125
|
| 1641 |
-
},
|
| 1642 |
-
{
|
| 1643 |
-
"acc": 0.98425,
|
| 1644 |
-
"precision": 0.9492176777381279,
|
| 1645 |
-
"recall": 0.9809929078014185,
|
| 1646 |
-
"f1": 0.9648437500000001
|
| 1647 |
-
},
|
| 1648 |
-
{
|
| 1649 |
-
"acc": 0.9826875,
|
| 1650 |
-
"precision": 0.9672036823935558,
|
| 1651 |
-
"recall": 0.953758865248227,
|
| 1652 |
-
"f1": 0.960434223682331
|
| 1653 |
-
},
|
| 1654 |
-
{
|
| 1655 |
-
"acc": 0.9845,
|
| 1656 |
-
"precision": 0.961679346294731,
|
| 1657 |
-
"recall": 0.9682269503546099,
|
| 1658 |
-
"f1": 0.964942041277919
|
| 1659 |
-
},
|
| 1660 |
-
{
|
| 1661 |
-
"acc": 0.9845,
|
| 1662 |
-
"precision": 0.960900140646976,
|
| 1663 |
-
"recall": 0.9690780141843972,
|
| 1664 |
-
"f1": 0.9649717514124294
|
| 1665 |
-
},
|
| 1666 |
-
{
|
| 1667 |
-
"acc": 0.984125,
|
| 1668 |
-
"precision": 0.9623975120158327,
|
| 1669 |
-
"recall": 0.9656737588652482,
|
| 1670 |
-
"f1": 0.9640328518833192
|
| 1671 |
-
},
|
| 1672 |
-
{
|
| 1673 |
-
"acc": 0.984875,
|
| 1674 |
-
"precision": 0.9571150097465887,
|
| 1675 |
-
"recall": 0.9750354609929078,
|
| 1676 |
-
"f1": 0.9659921304103429
|
| 1677 |
-
},
|
| 1678 |
-
{
|
| 1679 |
-
"acc": 0.984625,
|
| 1680 |
-
"precision": 0.9598877980364656,
|
| 1681 |
-
"recall": 0.9707801418439717,
|
| 1682 |
-
"f1": 0.9653032440056418
|
| 1683 |
-
},
|
| 1684 |
-
{
|
| 1685 |
-
"acc": 0.98375,
|
| 1686 |
-
"precision": 0.9546087440824282,
|
| 1687 |
-
"recall": 0.9724822695035461,
|
| 1688 |
-
"f1": 0.9634626194491286
|
| 1689 |
-
},
|
| 1690 |
-
{
|
| 1691 |
-
"acc": 0.984125,
|
| 1692 |
-
"precision": 0.9501789154968345,
|
| 1693 |
-
"recall": 0.979290780141844,
|
| 1694 |
-
"f1": 0.9645152277172394
|
| 1695 |
-
},
|
| 1696 |
-
{
|
| 1697 |
-
"acc": 0.9849375,
|
| 1698 |
-
"precision": 0.9607182940516273,
|
| 1699 |
-
"recall": 0.9713475177304964,
|
| 1700 |
-
"f1": 0.9660036676541119
|
| 1701 |
-
},
|
| 1702 |
-
{
|
| 1703 |
-
"acc": 0.984875,
|
| 1704 |
-
"precision": 0.956606397774687,
|
| 1705 |
-
"recall": 0.9756028368794326,
|
| 1706 |
-
"f1": 0.9660112359550562
|
| 1707 |
-
},
|
| 1708 |
-
{
|
| 1709 |
-
"acc": 0.984625,
|
| 1710 |
-
"precision": 0.9570671870643992,
|
| 1711 |
-
"recall": 0.9739007092198582,
|
| 1712 |
-
"f1": 0.9654105736782902
|
| 1713 |
-
},
|
| 1714 |
-
{
|
| 1715 |
-
"acc": 0.9849375,
|
| 1716 |
-
"precision": 0.9584031267448353,
|
| 1717 |
-
"recall": 0.9739007092198582,
|
| 1718 |
-
"f1": 0.9660897706486562
|
| 1719 |
-
},
|
| 1720 |
-
{
|
| 1721 |
-
"acc": 0.98375,
|
| 1722 |
-
"precision": 0.9523413688002217,
|
| 1723 |
-
"recall": 0.9750354609929078,
|
| 1724 |
-
"f1": 0.9635548079618728
|
| 1725 |
-
},
|
| 1726 |
-
{
|
| 1727 |
-
"acc": 0.984,
|
| 1728 |
-
"precision": 0.9536497363308354,
|
| 1729 |
-
"recall": 0.9747517730496454,
|
| 1730 |
-
"f1": 0.9640852974186307
|
| 1731 |
-
},
|
| 1732 |
-
{
|
| 1733 |
-
"acc": 0.98375,
|
| 1734 |
-
"precision": 0.9505934308584046,
|
| 1735 |
-
"recall": 0.9770212765957447,
|
| 1736 |
-
"f1": 0.9636261891438165
|
| 1737 |
-
},
|
| 1738 |
-
{
|
| 1739 |
-
"acc": 0.984,
|
| 1740 |
-
"precision": 0.9533980582524272,
|
| 1741 |
-
"recall": 0.9750354609929078,
|
| 1742 |
-
"f1": 0.9640953716690043
|
| 1743 |
-
}
|
| 1744 |
-
]
|
| 1745 |
-
}
|
| 1746 |
-
},
|
| 1747 |
-
"config": {
|
| 1748 |
-
"n_train": 2000,
|
| 1749 |
-
"n_test": 400,
|
| 1750 |
-
"hidden_dim": 64,
|
| 1751 |
-
"epochs": 80,
|
| 1752 |
-
"lr": 0.002,
|
| 1753 |
-
"max_hops": 3
|
| 1754 |
-
},
|
| 1755 |
-
"elapsed_min": 21.402417866388955
|
| 1756 |
}
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"graphs": {
|
| 3 |
+
"easy": {
|
| 4 |
+
"n_nodes": 12,
|
| 5 |
+
"n_edges": 10,
|
| 6 |
+
"gnn_final": {
|
| 7 |
+
"acc": 1.0,
|
| 8 |
+
"precision": 1.0,
|
| 9 |
+
"recall": 1.0,
|
| 10 |
+
"f1": 1.0
|
| 11 |
+
},
|
| 12 |
+
"baseline_direct_neighbors": {
|
| 13 |
+
"acc": 0.8258333333333333,
|
| 14 |
+
"precision": 1.0,
|
| 15 |
+
"recall": 0.6352530541012217,
|
| 16 |
+
"f1": 0.7769477054429028
|
| 17 |
+
},
|
| 18 |
+
"improvement_f1_pp": 22.305229455709718,
|
| 19 |
+
"train_loss_curve": [
|
| 20 |
+
0.10601958807871187,
|
| 21 |
+
0.00014574478766241308,
|
| 22 |
+
2.1336230871288145e-05,
|
| 23 |
+
5.904760447787133e-06,
|
| 24 |
+
0.014828034023753519,
|
| 25 |
+
0.0001365676538936252,
|
| 26 |
+
2.800940909035432e-05,
|
| 27 |
+
7.873948834791846e-06,
|
| 28 |
+
2.40824965675521e-06,
|
| 29 |
+
7.439197035413468e-07,
|
| 30 |
+
2.349434055591839e-07,
|
| 31 |
+
8.035365056026132e-08,
|
| 32 |
+
1.866763376779131e-08,
|
| 33 |
+
6.7128299592450774e-09,
|
| 34 |
+
3.606812599319898e-09,
|
| 35 |
+
2.4320182903440704e-09,
|
| 36 |
+
1.5445408799196548e-09,
|
| 37 |
+
0.03198392186360504,
|
| 38 |
+
1.3277981027858794e-05,
|
| 39 |
+
7.040849976128097e-06,
|
| 40 |
+
2.0380432214083175e-06,
|
| 41 |
+
5.154616233541851e-07,
|
| 42 |
+
0.017213296287886225,
|
| 43 |
+
0.00023569030925164338,
|
| 44 |
+
2.4805963813645227e-05,
|
| 45 |
+
6.058055528068272e-06,
|
| 46 |
+
1.8203820033098038e-06,
|
| 47 |
+
6.043328515907098e-07,
|
| 48 |
+
2.1225388103874568e-07,
|
| 49 |
+
7.437462508802039e-08,
|
| 50 |
+
1.902343076246039e-08,
|
| 51 |
+
6.527784956639485e-09,
|
| 52 |
+
3.3294667175720776e-09,
|
| 53 |
+
1.9615958442567566e-09,
|
| 54 |
+
0.010902570914775889,
|
| 55 |
+
2.806348171776314e-05,
|
| 56 |
+
7.667120790626038e-06,
|
| 57 |
+
2.582107717285551e-06,
|
| 58 |
+
9.129105348027232e-07,
|
| 59 |
+
3.106581481139294e-07,
|
| 60 |
+
1.0230859844032431e-07,
|
| 61 |
+
2.725160428237702e-08,
|
| 62 |
+
8.880124408068363e-09,
|
| 63 |
+
4.4200613740675046e-09,
|
| 64 |
+
2.8600379247657045e-09,
|
| 65 |
+
2.2151315261330923e-09,
|
| 66 |
+
1.7114610773887693e-09,
|
| 67 |
+
1.4000422095074408e-09,
|
| 68 |
+
1.0463116296276038e-09,
|
| 69 |
+
6.4079628731738e-10,
|
| 70 |
+
0.02516633728286725,
|
| 71 |
+
0.00012813284900565014,
|
| 72 |
+
2.3232634050379803e-05,
|
| 73 |
+
7.066120872802589e-06,
|
| 74 |
+
2.311430617913936e-06,
|
| 75 |
+
7.920952698295068e-07,
|
| 76 |
+
2.5278086959691613e-07,
|
| 77 |
+
7.818242851037627e-08,
|
| 78 |
+
1.983640248580842e-08,
|
| 79 |
+
7.863145182916767e-09,
|
| 80 |
+
5.0701508055233275e-09,
|
| 81 |
+
4.364776342121379e-09,
|
| 82 |
+
3.937454630286758e-09,
|
| 83 |
+
2.518706138457294e-09,
|
| 84 |
+
1.9815549914984234e-09,
|
| 85 |
+
0.018349960519401222,
|
| 86 |
+
7.85511791638533e-05,
|
| 87 |
+
2.0063992723006376e-05,
|
| 88 |
+
6.210748974664104e-06,
|
| 89 |
+
1.9043317207399904e-06,
|
| 90 |
+
6.112533347568437e-07,
|
| 91 |
+
2.0612900407184615e-07,
|
| 92 |
+
6.247272126631417e-08,
|
| 93 |
+
1.5818333928198573e-08,
|
| 94 |
+
5.678499110562204e-09,
|
| 95 |
+
2.927658185385007e-09,
|
| 96 |
+
2.2895658619235268e-09,
|
| 97 |
+
1.9812523096841366e-09,
|
| 98 |
+
1.418338779821114e-09,
|
| 99 |
+
9.94527561841937e-10
|
| 100 |
+
],
|
| 101 |
+
"test_metric_curve": [
|
| 102 |
+
{
|
| 103 |
+
"acc": 1.0,
|
| 104 |
+
"precision": 1.0,
|
| 105 |
+
"recall": 1.0,
|
| 106 |
+
"f1": 1.0
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"acc": 1.0,
|
| 110 |
+
"precision": 1.0,
|
| 111 |
+
"recall": 1.0,
|
| 112 |
+
"f1": 1.0
|
| 113 |
+
},
|
| 114 |
+
{
|
| 115 |
+
"acc": 1.0,
|
| 116 |
+
"precision": 1.0,
|
| 117 |
+
"recall": 1.0,
|
| 118 |
+
"f1": 1.0
|
| 119 |
+
},
|
| 120 |
+
{
|
| 121 |
+
"acc": 1.0,
|
| 122 |
+
"precision": 1.0,
|
| 123 |
+
"recall": 1.0,
|
| 124 |
+
"f1": 1.0
|
| 125 |
+
},
|
| 126 |
+
{
|
| 127 |
+
"acc": 1.0,
|
| 128 |
+
"precision": 1.0,
|
| 129 |
+
"recall": 1.0,
|
| 130 |
+
"f1": 1.0
|
| 131 |
+
},
|
| 132 |
+
{
|
| 133 |
+
"acc": 1.0,
|
| 134 |
+
"precision": 1.0,
|
| 135 |
+
"recall": 1.0,
|
| 136 |
+
"f1": 1.0
|
| 137 |
+
},
|
| 138 |
+
{
|
| 139 |
+
"acc": 1.0,
|
| 140 |
+
"precision": 1.0,
|
| 141 |
+
"recall": 1.0,
|
| 142 |
+
"f1": 1.0
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"acc": 1.0,
|
| 146 |
+
"precision": 1.0,
|
| 147 |
+
"recall": 1.0,
|
| 148 |
+
"f1": 1.0
|
| 149 |
+
},
|
| 150 |
+
{
|
| 151 |
+
"acc": 1.0,
|
| 152 |
+
"precision": 1.0,
|
| 153 |
+
"recall": 1.0,
|
| 154 |
+
"f1": 1.0
|
| 155 |
+
},
|
| 156 |
+
{
|
| 157 |
+
"acc": 1.0,
|
| 158 |
+
"precision": 1.0,
|
| 159 |
+
"recall": 1.0,
|
| 160 |
+
"f1": 1.0
|
| 161 |
+
},
|
| 162 |
+
{
|
| 163 |
+
"acc": 1.0,
|
| 164 |
+
"precision": 1.0,
|
| 165 |
+
"recall": 1.0,
|
| 166 |
+
"f1": 1.0
|
| 167 |
+
},
|
| 168 |
+
{
|
| 169 |
+
"acc": 1.0,
|
| 170 |
+
"precision": 1.0,
|
| 171 |
+
"recall": 1.0,
|
| 172 |
+
"f1": 1.0
|
| 173 |
+
},
|
| 174 |
+
{
|
| 175 |
+
"acc": 1.0,
|
| 176 |
+
"precision": 1.0,
|
| 177 |
+
"recall": 1.0,
|
| 178 |
+
"f1": 1.0
|
| 179 |
+
},
|
| 180 |
+
{
|
| 181 |
+
"acc": 1.0,
|
| 182 |
+
"precision": 1.0,
|
| 183 |
+
"recall": 1.0,
|
| 184 |
+
"f1": 1.0
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"acc": 1.0,
|
| 188 |
+
"precision": 1.0,
|
| 189 |
+
"recall": 1.0,
|
| 190 |
+
"f1": 1.0
|
| 191 |
+
},
|
| 192 |
+
{
|
| 193 |
+
"acc": 1.0,
|
| 194 |
+
"precision": 1.0,
|
| 195 |
+
"recall": 1.0,
|
| 196 |
+
"f1": 1.0
|
| 197 |
+
},
|
| 198 |
+
{
|
| 199 |
+
"acc": 1.0,
|
| 200 |
+
"precision": 1.0,
|
| 201 |
+
"recall": 1.0,
|
| 202 |
+
"f1": 1.0
|
| 203 |
+
},
|
| 204 |
+
{
|
| 205 |
+
"acc": 1.0,
|
| 206 |
+
"precision": 1.0,
|
| 207 |
+
"recall": 1.0,
|
| 208 |
+
"f1": 1.0
|
| 209 |
+
},
|
| 210 |
+
{
|
| 211 |
+
"acc": 1.0,
|
| 212 |
+
"precision": 1.0,
|
| 213 |
+
"recall": 1.0,
|
| 214 |
+
"f1": 1.0
|
| 215 |
+
},
|
| 216 |
+
{
|
| 217 |
+
"acc": 1.0,
|
| 218 |
+
"precision": 1.0,
|
| 219 |
+
"recall": 1.0,
|
| 220 |
+
"f1": 1.0
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"acc": 1.0,
|
| 224 |
+
"precision": 1.0,
|
| 225 |
+
"recall": 1.0,
|
| 226 |
+
"f1": 1.0
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"acc": 1.0,
|
| 230 |
+
"precision": 1.0,
|
| 231 |
+
"recall": 1.0,
|
| 232 |
+
"f1": 1.0
|
| 233 |
+
},
|
| 234 |
+
{
|
| 235 |
+
"acc": 1.0,
|
| 236 |
+
"precision": 1.0,
|
| 237 |
+
"recall": 1.0,
|
| 238 |
+
"f1": 1.0
|
| 239 |
+
},
|
| 240 |
+
{
|
| 241 |
+
"acc": 1.0,
|
| 242 |
+
"precision": 1.0,
|
| 243 |
+
"recall": 1.0,
|
| 244 |
+
"f1": 1.0
|
| 245 |
+
},
|
| 246 |
+
{
|
| 247 |
+
"acc": 1.0,
|
| 248 |
+
"precision": 1.0,
|
| 249 |
+
"recall": 1.0,
|
| 250 |
+
"f1": 1.0
|
| 251 |
+
},
|
| 252 |
+
{
|
| 253 |
+
"acc": 1.0,
|
| 254 |
+
"precision": 1.0,
|
| 255 |
+
"recall": 1.0,
|
| 256 |
+
"f1": 1.0
|
| 257 |
+
},
|
| 258 |
+
{
|
| 259 |
+
"acc": 1.0,
|
| 260 |
+
"precision": 1.0,
|
| 261 |
+
"recall": 1.0,
|
| 262 |
+
"f1": 1.0
|
| 263 |
+
},
|
| 264 |
+
{
|
| 265 |
+
"acc": 1.0,
|
| 266 |
+
"precision": 1.0,
|
| 267 |
+
"recall": 1.0,
|
| 268 |
+
"f1": 1.0
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"acc": 1.0,
|
| 272 |
+
"precision": 1.0,
|
| 273 |
+
"recall": 1.0,
|
| 274 |
+
"f1": 1.0
|
| 275 |
+
},
|
| 276 |
+
{
|
| 277 |
+
"acc": 1.0,
|
| 278 |
+
"precision": 1.0,
|
| 279 |
+
"recall": 1.0,
|
| 280 |
+
"f1": 1.0
|
| 281 |
+
},
|
| 282 |
+
{
|
| 283 |
+
"acc": 1.0,
|
| 284 |
+
"precision": 1.0,
|
| 285 |
+
"recall": 1.0,
|
| 286 |
+
"f1": 1.0
|
| 287 |
+
},
|
| 288 |
+
{
|
| 289 |
+
"acc": 1.0,
|
| 290 |
+
"precision": 1.0,
|
| 291 |
+
"recall": 1.0,
|
| 292 |
+
"f1": 1.0
|
| 293 |
+
},
|
| 294 |
+
{
|
| 295 |
+
"acc": 1.0,
|
| 296 |
+
"precision": 1.0,
|
| 297 |
+
"recall": 1.0,
|
| 298 |
+
"f1": 1.0
|
| 299 |
+
},
|
| 300 |
+
{
|
| 301 |
+
"acc": 1.0,
|
| 302 |
+
"precision": 1.0,
|
| 303 |
+
"recall": 1.0,
|
| 304 |
+
"f1": 1.0
|
| 305 |
+
},
|
| 306 |
+
{
|
| 307 |
+
"acc": 1.0,
|
| 308 |
+
"precision": 1.0,
|
| 309 |
+
"recall": 1.0,
|
| 310 |
+
"f1": 1.0
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"acc": 1.0,
|
| 314 |
+
"precision": 1.0,
|
| 315 |
+
"recall": 1.0,
|
| 316 |
+
"f1": 1.0
|
| 317 |
+
},
|
| 318 |
+
{
|
| 319 |
+
"acc": 1.0,
|
| 320 |
+
"precision": 1.0,
|
| 321 |
+
"recall": 1.0,
|
| 322 |
+
"f1": 1.0
|
| 323 |
+
},
|
| 324 |
+
{
|
| 325 |
+
"acc": 1.0,
|
| 326 |
+
"precision": 1.0,
|
| 327 |
+
"recall": 1.0,
|
| 328 |
+
"f1": 1.0
|
| 329 |
+
},
|
| 330 |
+
{
|
| 331 |
+
"acc": 1.0,
|
| 332 |
+
"precision": 1.0,
|
| 333 |
+
"recall": 1.0,
|
| 334 |
+
"f1": 1.0
|
| 335 |
+
},
|
| 336 |
+
{
|
| 337 |
+
"acc": 1.0,
|
| 338 |
+
"precision": 1.0,
|
| 339 |
+
"recall": 1.0,
|
| 340 |
+
"f1": 1.0
|
| 341 |
+
},
|
| 342 |
+
{
|
| 343 |
+
"acc": 1.0,
|
| 344 |
+
"precision": 1.0,
|
| 345 |
+
"recall": 1.0,
|
| 346 |
+
"f1": 1.0
|
| 347 |
+
},
|
| 348 |
+
{
|
| 349 |
+
"acc": 1.0,
|
| 350 |
+
"precision": 1.0,
|
| 351 |
+
"recall": 1.0,
|
| 352 |
+
"f1": 1.0
|
| 353 |
+
},
|
| 354 |
+
{
|
| 355 |
+
"acc": 1.0,
|
| 356 |
+
"precision": 1.0,
|
| 357 |
+
"recall": 1.0,
|
| 358 |
+
"f1": 1.0
|
| 359 |
+
},
|
| 360 |
+
{
|
| 361 |
+
"acc": 1.0,
|
| 362 |
+
"precision": 1.0,
|
| 363 |
+
"recall": 1.0,
|
| 364 |
+
"f1": 1.0
|
| 365 |
+
},
|
| 366 |
+
{
|
| 367 |
+
"acc": 1.0,
|
| 368 |
+
"precision": 1.0,
|
| 369 |
+
"recall": 1.0,
|
| 370 |
+
"f1": 1.0
|
| 371 |
+
},
|
| 372 |
+
{
|
| 373 |
+
"acc": 1.0,
|
| 374 |
+
"precision": 1.0,
|
| 375 |
+
"recall": 1.0,
|
| 376 |
+
"f1": 1.0
|
| 377 |
+
},
|
| 378 |
+
{
|
| 379 |
+
"acc": 1.0,
|
| 380 |
+
"precision": 1.0,
|
| 381 |
+
"recall": 1.0,
|
| 382 |
+
"f1": 1.0
|
| 383 |
+
},
|
| 384 |
+
{
|
| 385 |
+
"acc": 1.0,
|
| 386 |
+
"precision": 1.0,
|
| 387 |
+
"recall": 1.0,
|
| 388 |
+
"f1": 1.0
|
| 389 |
+
},
|
| 390 |
+
{
|
| 391 |
+
"acc": 1.0,
|
| 392 |
+
"precision": 1.0,
|
| 393 |
+
"recall": 1.0,
|
| 394 |
+
"f1": 1.0
|
| 395 |
+
},
|
| 396 |
+
{
|
| 397 |
+
"acc": 1.0,
|
| 398 |
+
"precision": 1.0,
|
| 399 |
+
"recall": 1.0,
|
| 400 |
+
"f1": 1.0
|
| 401 |
+
},
|
| 402 |
+
{
|
| 403 |
+
"acc": 1.0,
|
| 404 |
+
"precision": 1.0,
|
| 405 |
+
"recall": 1.0,
|
| 406 |
+
"f1": 1.0
|
| 407 |
+
},
|
| 408 |
+
{
|
| 409 |
+
"acc": 1.0,
|
| 410 |
+
"precision": 1.0,
|
| 411 |
+
"recall": 1.0,
|
| 412 |
+
"f1": 1.0
|
| 413 |
+
},
|
| 414 |
+
{
|
| 415 |
+
"acc": 1.0,
|
| 416 |
+
"precision": 1.0,
|
| 417 |
+
"recall": 1.0,
|
| 418 |
+
"f1": 1.0
|
| 419 |
+
},
|
| 420 |
+
{
|
| 421 |
+
"acc": 1.0,
|
| 422 |
+
"precision": 1.0,
|
| 423 |
+
"recall": 1.0,
|
| 424 |
+
"f1": 1.0
|
| 425 |
+
},
|
| 426 |
+
{
|
| 427 |
+
"acc": 1.0,
|
| 428 |
+
"precision": 1.0,
|
| 429 |
+
"recall": 1.0,
|
| 430 |
+
"f1": 1.0
|
| 431 |
+
},
|
| 432 |
+
{
|
| 433 |
+
"acc": 1.0,
|
| 434 |
+
"precision": 1.0,
|
| 435 |
+
"recall": 1.0,
|
| 436 |
+
"f1": 1.0
|
| 437 |
+
},
|
| 438 |
+
{
|
| 439 |
+
"acc": 1.0,
|
| 440 |
+
"precision": 1.0,
|
| 441 |
+
"recall": 1.0,
|
| 442 |
+
"f1": 1.0
|
| 443 |
+
},
|
| 444 |
+
{
|
| 445 |
+
"acc": 1.0,
|
| 446 |
+
"precision": 1.0,
|
| 447 |
+
"recall": 1.0,
|
| 448 |
+
"f1": 1.0
|
| 449 |
+
},
|
| 450 |
+
{
|
| 451 |
+
"acc": 1.0,
|
| 452 |
+
"precision": 1.0,
|
| 453 |
+
"recall": 1.0,
|
| 454 |
+
"f1": 1.0
|
| 455 |
+
},
|
| 456 |
+
{
|
| 457 |
+
"acc": 1.0,
|
| 458 |
+
"precision": 1.0,
|
| 459 |
+
"recall": 1.0,
|
| 460 |
+
"f1": 1.0
|
| 461 |
+
},
|
| 462 |
+
{
|
| 463 |
+
"acc": 1.0,
|
| 464 |
+
"precision": 1.0,
|
| 465 |
+
"recall": 1.0,
|
| 466 |
+
"f1": 1.0
|
| 467 |
+
},
|
| 468 |
+
{
|
| 469 |
+
"acc": 1.0,
|
| 470 |
+
"precision": 1.0,
|
| 471 |
+
"recall": 1.0,
|
| 472 |
+
"f1": 1.0
|
| 473 |
+
},
|
| 474 |
+
{
|
| 475 |
+
"acc": 1.0,
|
| 476 |
+
"precision": 1.0,
|
| 477 |
+
"recall": 1.0,
|
| 478 |
+
"f1": 1.0
|
| 479 |
+
},
|
| 480 |
+
{
|
| 481 |
+
"acc": 1.0,
|
| 482 |
+
"precision": 1.0,
|
| 483 |
+
"recall": 1.0,
|
| 484 |
+
"f1": 1.0
|
| 485 |
+
},
|
| 486 |
+
{
|
| 487 |
+
"acc": 1.0,
|
| 488 |
+
"precision": 1.0,
|
| 489 |
+
"recall": 1.0,
|
| 490 |
+
"f1": 1.0
|
| 491 |
+
},
|
| 492 |
+
{
|
| 493 |
+
"acc": 1.0,
|
| 494 |
+
"precision": 1.0,
|
| 495 |
+
"recall": 1.0,
|
| 496 |
+
"f1": 1.0
|
| 497 |
+
},
|
| 498 |
+
{
|
| 499 |
+
"acc": 1.0,
|
| 500 |
+
"precision": 1.0,
|
| 501 |
+
"recall": 1.0,
|
| 502 |
+
"f1": 1.0
|
| 503 |
+
},
|
| 504 |
+
{
|
| 505 |
+
"acc": 1.0,
|
| 506 |
+
"precision": 1.0,
|
| 507 |
+
"recall": 1.0,
|
| 508 |
+
"f1": 1.0
|
| 509 |
+
},
|
| 510 |
+
{
|
| 511 |
+
"acc": 1.0,
|
| 512 |
+
"precision": 1.0,
|
| 513 |
+
"recall": 1.0,
|
| 514 |
+
"f1": 1.0
|
| 515 |
+
},
|
| 516 |
+
{
|
| 517 |
+
"acc": 1.0,
|
| 518 |
+
"precision": 1.0,
|
| 519 |
+
"recall": 1.0,
|
| 520 |
+
"f1": 1.0
|
| 521 |
+
},
|
| 522 |
+
{
|
| 523 |
+
"acc": 1.0,
|
| 524 |
+
"precision": 1.0,
|
| 525 |
+
"recall": 1.0,
|
| 526 |
+
"f1": 1.0
|
| 527 |
+
},
|
| 528 |
+
{
|
| 529 |
+
"acc": 1.0,
|
| 530 |
+
"precision": 1.0,
|
| 531 |
+
"recall": 1.0,
|
| 532 |
+
"f1": 1.0
|
| 533 |
+
},
|
| 534 |
+
{
|
| 535 |
+
"acc": 1.0,
|
| 536 |
+
"precision": 1.0,
|
| 537 |
+
"recall": 1.0,
|
| 538 |
+
"f1": 1.0
|
| 539 |
+
},
|
| 540 |
+
{
|
| 541 |
+
"acc": 1.0,
|
| 542 |
+
"precision": 1.0,
|
| 543 |
+
"recall": 1.0,
|
| 544 |
+
"f1": 1.0
|
| 545 |
+
},
|
| 546 |
+
{
|
| 547 |
+
"acc": 1.0,
|
| 548 |
+
"precision": 1.0,
|
| 549 |
+
"recall": 1.0,
|
| 550 |
+
"f1": 1.0
|
| 551 |
+
},
|
| 552 |
+
{
|
| 553 |
+
"acc": 1.0,
|
| 554 |
+
"precision": 1.0,
|
| 555 |
+
"recall": 1.0,
|
| 556 |
+
"f1": 1.0
|
| 557 |
+
},
|
| 558 |
+
{
|
| 559 |
+
"acc": 1.0,
|
| 560 |
+
"precision": 1.0,
|
| 561 |
+
"recall": 1.0,
|
| 562 |
+
"f1": 1.0
|
| 563 |
+
},
|
| 564 |
+
{
|
| 565 |
+
"acc": 1.0,
|
| 566 |
+
"precision": 1.0,
|
| 567 |
+
"recall": 1.0,
|
| 568 |
+
"f1": 1.0
|
| 569 |
+
},
|
| 570 |
+
{
|
| 571 |
+
"acc": 1.0,
|
| 572 |
+
"precision": 1.0,
|
| 573 |
+
"recall": 1.0,
|
| 574 |
+
"f1": 1.0
|
| 575 |
+
},
|
| 576 |
+
{
|
| 577 |
+
"acc": 1.0,
|
| 578 |
+
"precision": 1.0,
|
| 579 |
+
"recall": 1.0,
|
| 580 |
+
"f1": 1.0
|
| 581 |
+
}
|
| 582 |
+
]
|
| 583 |
+
},
|
| 584 |
+
"medium": {
|
| 585 |
+
"n_nodes": 25,
|
| 586 |
+
"n_edges": 27,
|
| 587 |
+
"gnn_final": {
|
| 588 |
+
"acc": 0.9914,
|
| 589 |
+
"precision": 0.982778750729714,
|
| 590 |
+
"recall": 0.9920447849145551,
|
| 591 |
+
"f1": 0.9873900293255131
|
| 592 |
+
},
|
| 593 |
+
"baseline_direct_neighbors": {
|
| 594 |
+
"acc": 0.8301,
|
| 595 |
+
"precision": 1.0,
|
| 596 |
+
"recall": 0.4994107248084856,
|
| 597 |
+
"f1": 0.6661426606405974
|
| 598 |
+
},
|
| 599 |
+
"improvement_f1_pp": 32.124736868491574,
|
| 600 |
+
"train_loss_curve": [
|
| 601 |
+
0.18512494587464606,
|
| 602 |
+
0.05774239192842651,
|
| 603 |
+
0.04035148839658183,
|
| 604 |
+
0.03685507851154424,
|
| 605 |
+
0.034016887983169666,
|
| 606 |
+
0.03193854558186021,
|
| 607 |
+
0.030314448321928544,
|
| 608 |
+
0.028890588828011224,
|
| 609 |
+
0.02627120438580584,
|
| 610 |
+
0.02676936000857496,
|
| 611 |
+
0.02735587336003725,
|
| 612 |
+
0.024704556535801756,
|
| 613 |
+
0.023389738032454397,
|
| 614 |
+
0.02484239745095036,
|
| 615 |
+
0.022598365899086623,
|
| 616 |
+
0.022097759216314333,
|
| 617 |
+
0.021880711925624425,
|
| 618 |
+
0.023672257099118552,
|
| 619 |
+
0.021815840122002862,
|
| 620 |
+
0.021538631150760885,
|
| 621 |
+
0.021590486920307173,
|
| 622 |
+
0.020993219244996,
|
| 623 |
+
0.021660113581202914,
|
| 624 |
+
0.02028199757042485,
|
| 625 |
+
0.021449406110984975,
|
| 626 |
+
0.02049649202735325,
|
| 627 |
+
0.02005596899437715,
|
| 628 |
+
0.02060316097080978,
|
| 629 |
+
0.02082035162168178,
|
| 630 |
+
0.020935066080168856,
|
| 631 |
+
0.0209964800781561,
|
| 632 |
+
0.019652295691733542,
|
| 633 |
+
0.020470858438760543,
|
| 634 |
+
0.020456047435481396,
|
| 635 |
+
0.020529603496513553,
|
| 636 |
+
0.019996260003822708,
|
| 637 |
+
0.021328506347361064,
|
| 638 |
+
0.019778630244522907,
|
| 639 |
+
0.01971426555108731,
|
| 640 |
+
0.019847191254493045,
|
| 641 |
+
0.01984119418810368,
|
| 642 |
+
0.02021396374486143,
|
| 643 |
+
0.01946370021810413,
|
| 644 |
+
0.019111871498224214,
|
| 645 |
+
0.019667785586758944,
|
| 646 |
+
0.021675049597691873,
|
| 647 |
+
0.01897557202284267,
|
| 648 |
+
0.01971483370839516,
|
| 649 |
+
0.01965866965101487,
|
| 650 |
+
0.01936112277971507,
|
| 651 |
+
0.01895255452432814,
|
| 652 |
+
0.02035098125927439,
|
| 653 |
+
0.01909720691408324,
|
| 654 |
+
0.019500281907226687,
|
| 655 |
+
0.019117790717674256,
|
| 656 |
+
0.018927754213147425,
|
| 657 |
+
0.020313845976115717,
|
| 658 |
+
0.019341792678655486,
|
| 659 |
+
0.01890229735773205,
|
| 660 |
+
0.019833170414518056,
|
| 661 |
+
0.01948640772390163,
|
| 662 |
+
0.019305320678627013,
|
| 663 |
+
0.019213381035159603,
|
| 664 |
+
0.020478221997059808,
|
| 665 |
+
0.01936127331570382,
|
| 666 |
+
0.019158014420631225,
|
| 667 |
+
0.019090143173694583,
|
| 668 |
+
0.020291763241906225,
|
| 669 |
+
0.01900654871721499,
|
| 670 |
+
0.019815083033949698,
|
| 671 |
+
0.019103285589502736,
|
| 672 |
+
0.018360809753397392,
|
| 673 |
+
0.019985065603578676,
|
| 674 |
+
0.01858524212906661,
|
| 675 |
+
0.02056734084818314,
|
| 676 |
+
0.01856864124721938,
|
| 677 |
+
0.01852369899036554,
|
| 678 |
+
0.018906581267301003,
|
| 679 |
+
0.01927234342475787,
|
| 680 |
+
0.018721831301170885
|
| 681 |
+
],
|
| 682 |
+
"test_metric_curve": [
|
| 683 |
+
{
|
| 684 |
+
"acc": 0.9816,
|
| 685 |
+
"precision": 0.9819819819819819,
|
| 686 |
+
"recall": 0.9634649381261049,
|
| 687 |
+
"f1": 0.9726353361094586
|
| 688 |
+
},
|
| 689 |
+
{
|
| 690 |
+
"acc": 0.9885,
|
| 691 |
+
"precision": 0.9742551345096905,
|
| 692 |
+
"recall": 0.9923394225103123,
|
| 693 |
+
"f1": 0.9832141293241862
|
| 694 |
+
},
|
| 695 |
+
{
|
| 696 |
+
"acc": 0.988,
|
| 697 |
+
"precision": 0.9720299884659747,
|
| 698 |
+
"recall": 0.993223335297584,
|
| 699 |
+
"f1": 0.9825123870591663
|
| 700 |
+
},
|
| 701 |
+
{
|
| 702 |
+
"acc": 0.9892,
|
| 703 |
+
"precision": 0.986094674556213,
|
| 704 |
+
"recall": 0.9820271066588097,
|
| 705 |
+
"f1": 0.9840566873339238
|
| 706 |
+
},
|
| 707 |
+
{
|
| 708 |
+
"acc": 0.9916,
|
| 709 |
+
"precision": 0.9825072886297376,
|
| 710 |
+
"recall": 0.9929286977018268,
|
| 711 |
+
"f1": 0.9876905041031652
|
| 712 |
+
},
|
| 713 |
+
{
|
| 714 |
+
"acc": 0.9913,
|
| 715 |
+
"precision": 0.9824919754887657,
|
| 716 |
+
"recall": 0.9920447849145551,
|
| 717 |
+
"f1": 0.9872452719542588
|
| 718 |
+
},
|
| 719 |
+
{
|
| 720 |
+
"acc": 0.9909,
|
| 721 |
+
"precision": 0.9847373055474024,
|
| 722 |
+
"recall": 0.9885091337654685,
|
| 723 |
+
"f1": 0.9866196147625349
|
| 724 |
+
},
|
| 725 |
+
{
|
| 726 |
+
"acc": 0.9857,
|
| 727 |
+
"precision": 0.9954282231027126,
|
| 728 |
+
"recall": 0.9622863877430761,
|
| 729 |
+
"f1": 0.9785767790262172
|
| 730 |
+
},
|
| 731 |
+
{
|
| 732 |
+
"acc": 0.9882,
|
| 733 |
+
"precision": 0.9761627906976744,
|
| 734 |
+
"recall": 0.9893930465527401,
|
| 735 |
+
"f1": 0.9827333918642083
|
| 736 |
+
},
|
| 737 |
+
{
|
| 738 |
+
"acc": 0.9912,
|
| 739 |
+
"precision": 0.9833333333333333,
|
| 740 |
+
"recall": 0.9908662345315262,
|
| 741 |
+
"f1": 0.9870854123862635
|
| 742 |
+
},
|
| 743 |
+
{
|
| 744 |
+
"acc": 0.9911,
|
| 745 |
+
"precision": 0.9864586399764498,
|
| 746 |
+
"recall": 0.9873305833824396,
|
| 747 |
+
"f1": 0.9868944190840818
|
| 748 |
+
},
|
| 749 |
+
{
|
| 750 |
+
"acc": 0.9842,
|
| 751 |
+
"precision": 0.997539975399754,
|
| 752 |
+
"recall": 0.9558043606364172,
|
| 753 |
+
"f1": 0.9762263015347576
|
| 754 |
+
},
|
| 755 |
+
{
|
| 756 |
+
"acc": 0.9872,
|
| 757 |
+
"precision": 0.9936517533252721,
|
| 758 |
+
"recall": 0.9684737772539777,
|
| 759 |
+
"f1": 0.9809012235153686
|
| 760 |
+
},
|
| 761 |
+
{
|
| 762 |
+
"acc": 0.9919,
|
| 763 |
+
"precision": 0.9825225750072822,
|
| 764 |
+
"recall": 0.9938126104890984,
|
| 765 |
+
"f1": 0.9881353449538597
|
| 766 |
+
},
|
| 767 |
+
{
|
| 768 |
+
"acc": 0.9905,
|
| 769 |
+
"precision": 0.9864346800353878,
|
| 770 |
+
"recall": 0.9855627578078963,
|
| 771 |
+
"f1": 0.9859985261606485
|
| 772 |
+
},
|
| 773 |
+
{
|
| 774 |
+
"acc": 0.9903,
|
| 775 |
+
"precision": 0.9867139061116031,
|
| 776 |
+
"recall": 0.9846788450206246,
|
| 777 |
+
"f1": 0.9856953251732783
|
| 778 |
+
},
|
| 779 |
+
{
|
| 780 |
+
"acc": 0.9912,
|
| 781 |
+
"precision": 0.9833333333333333,
|
| 782 |
+
"recall": 0.9908662345315262,
|
| 783 |
+
"f1": 0.9870854123862635
|
| 784 |
+
},
|
| 785 |
+
{
|
| 786 |
+
"acc": 0.9917,
|
| 787 |
+
"precision": 0.9827938174394867,
|
| 788 |
+
"recall": 0.9929286977018268,
|
| 789 |
+
"f1": 0.9878352630807563
|
| 790 |
+
},
|
| 791 |
+
{
|
| 792 |
+
"acc": 0.9914,
|
| 793 |
+
"precision": 0.9822157434402332,
|
| 794 |
+
"recall": 0.9926340601060696,
|
| 795 |
+
"f1": 0.9873974208675265
|
| 796 |
+
},
|
| 797 |
+
{
|
| 798 |
+
"acc": 0.9914,
|
| 799 |
+
"precision": 0.9833430742255991,
|
| 800 |
+
"recall": 0.9914555097230406,
|
| 801 |
+
"f1": 0.9873826291079812
|
| 802 |
+
},
|
| 803 |
+
{
|
| 804 |
+
"acc": 0.9908,
|
| 805 |
+
"precision": 0.986446670595168,
|
| 806 |
+
"recall": 0.986446670595168,
|
| 807 |
+
"f1": 0.986446670595168
|
| 808 |
+
},
|
| 809 |
+
{
|
| 810 |
+
"acc": 0.9908,
|
| 811 |
+
"precision": 0.986446670595168,
|
| 812 |
+
"recall": 0.986446670595168,
|
| 813 |
+
"f1": 0.986446670595168
|
| 814 |
+
},
|
| 815 |
+
{
|
| 816 |
+
"acc": 0.9909,
|
| 817 |
+
"precision": 0.9858781994704324,
|
| 818 |
+
"recall": 0.9873305833824396,
|
| 819 |
+
"f1": 0.9866038569115266
|
| 820 |
+
},
|
| 821 |
+
{
|
| 822 |
+
"acc": 0.9912,
|
| 823 |
+
"precision": 0.9833333333333333,
|
| 824 |
+
"recall": 0.9908662345315262,
|
| 825 |
+
"f1": 0.9870854123862635
|
| 826 |
+
},
|
| 827 |
+
{
|
| 828 |
+
"acc": 0.9915,
|
| 829 |
+
"precision": 0.9827837758972863,
|
| 830 |
+
"recall": 0.9923394225103123,
|
| 831 |
+
"f1": 0.9875384840932414
|
| 832 |
+
},
|
| 833 |
+
{
|
| 834 |
+
"acc": 0.9907,
|
| 835 |
+
"precision": 0.9873043991733097,
|
| 836 |
+
"recall": 0.985268120212139,
|
| 837 |
+
"f1": 0.9862852086712873
|
| 838 |
+
},
|
| 839 |
+
{
|
| 840 |
+
"acc": 0.9919,
|
| 841 |
+
"precision": 0.9825225750072822,
|
| 842 |
+
"recall": 0.9938126104890984,
|
| 843 |
+
"f1": 0.9881353449538597
|
| 844 |
+
},
|
| 845 |
+
{
|
| 846 |
+
"acc": 0.9914,
|
| 847 |
+
"precision": 0.982778750729714,
|
| 848 |
+
"recall": 0.9920447849145551,
|
| 849 |
+
"f1": 0.9873900293255131
|
| 850 |
+
},
|
| 851 |
+
{
|
| 852 |
+
"acc": 0.9916,
|
| 853 |
+
"precision": 0.9777713625866051,
|
| 854 |
+
"recall": 0.9979375368296994,
|
| 855 |
+
"f1": 0.9877515310586177
|
| 856 |
+
},
|
| 857 |
+
{
|
| 858 |
+
"acc": 0.9901,
|
| 859 |
+
"precision": 0.9869937924918711,
|
| 860 |
+
"recall": 0.983794932233353,
|
| 861 |
+
"f1": 0.9853917662682603
|
| 862 |
+
},
|
| 863 |
+
{
|
| 864 |
+
"acc": 0.9914,
|
| 865 |
+
"precision": 0.982778750729714,
|
| 866 |
+
"recall": 0.9920447849145551,
|
| 867 |
+
"f1": 0.9873900293255131
|
| 868 |
+
},
|
| 869 |
+
{
|
| 870 |
+
"acc": 0.9904,
|
| 871 |
+
"precision": 0.9872931442080378,
|
| 872 |
+
"recall": 0.9843842074248674,
|
| 873 |
+
"f1": 0.9858365299498378
|
| 874 |
+
},
|
| 875 |
+
{
|
| 876 |
+
"acc": 0.9914,
|
| 877 |
+
"precision": 0.982778750729714,
|
| 878 |
+
"recall": 0.9920447849145551,
|
| 879 |
+
"f1": 0.9873900293255131
|
| 880 |
+
},
|
| 881 |
+
{
|
| 882 |
+
"acc": 0.9887,
|
| 883 |
+
"precision": 0.993680409268733,
|
| 884 |
+
"recall": 0.9728933411903359,
|
| 885 |
+
"f1": 0.9831770135477147
|
| 886 |
+
},
|
| 887 |
+
{
|
| 888 |
+
"acc": 0.9912,
|
| 889 |
+
"precision": 0.9833333333333333,
|
| 890 |
+
"recall": 0.9908662345315262,
|
| 891 |
+
"f1": 0.9870854123862635
|
| 892 |
+
},
|
| 893 |
+
{
|
| 894 |
+
"acc": 0.9913,
|
| 895 |
+
"precision": 0.983338205203157,
|
| 896 |
+
"recall": 0.9911608721272834,
|
| 897 |
+
"f1": 0.9872340425531914
|
| 898 |
+
},
|
| 899 |
+
{
|
| 900 |
+
"acc": 0.9915,
|
| 901 |
+
"precision": 0.9827837758972863,
|
| 902 |
+
"recall": 0.9923394225103123,
|
| 903 |
+
"f1": 0.9875384840932414
|
| 904 |
+
},
|
| 905 |
+
{
|
| 906 |
+
"acc": 0.991,
|
| 907 |
+
"precision": 0.9858823529411764,
|
| 908 |
+
"recall": 0.9876252209781968,
|
| 909 |
+
"f1": 0.986753017368266
|
| 910 |
+
},
|
| 911 |
+
{
|
| 912 |
+
"acc": 0.9905,
|
| 913 |
+
"precision": 0.9870091526424565,
|
| 914 |
+
"recall": 0.9849734826163818,
|
| 915 |
+
"f1": 0.9859902669222829
|
| 916 |
+
},
|
| 917 |
+
{
|
| 918 |
+
"acc": 0.9912,
|
| 919 |
+
"precision": 0.9830508474576272,
|
| 920 |
+
"recall": 0.9911608721272834,
|
| 921 |
+
"f1": 0.9870892018779343
|
| 922 |
+
},
|
| 923 |
+
{
|
| 924 |
+
"acc": 0.9911,
|
| 925 |
+
"precision": 0.9822001750802452,
|
| 926 |
+
"recall": 0.9917501473187978,
|
| 927 |
+
"f1": 0.9869520598152763
|
| 928 |
+
},
|
| 929 |
+
{
|
| 930 |
+
"acc": 0.9901,
|
| 931 |
+
"precision": 0.9887273805992287,
|
| 932 |
+
"recall": 0.9820271066588097,
|
| 933 |
+
"f1": 0.9853658536585367
|
| 934 |
+
},
|
| 935 |
+
{
|
| 936 |
+
"acc": 0.9914,
|
| 937 |
+
"precision": 0.982778750729714,
|
| 938 |
+
"recall": 0.9920447849145551,
|
| 939 |
+
"f1": 0.9873900293255131
|
| 940 |
+
},
|
| 941 |
+
{
|
| 942 |
+
"acc": 0.9907,
|
| 943 |
+
"precision": 0.9833089311859443,
|
| 944 |
+
"recall": 0.9893930465527401,
|
| 945 |
+
"f1": 0.9863416066970185
|
| 946 |
+
},
|
| 947 |
+
{
|
| 948 |
+
"acc": 0.9914,
|
| 949 |
+
"precision": 0.982778750729714,
|
| 950 |
+
"recall": 0.9920447849145551,
|
| 951 |
+
"f1": 0.9873900293255131
|
| 952 |
+
},
|
| 953 |
+
{
|
| 954 |
+
"acc": 0.9908,
|
| 955 |
+
"precision": 0.986446670595168,
|
| 956 |
+
"recall": 0.986446670595168,
|
| 957 |
+
"f1": 0.986446670595168
|
| 958 |
+
},
|
| 959 |
+
{
|
| 960 |
+
"acc": 0.991,
|
| 961 |
+
"precision": 0.9833235810415447,
|
| 962 |
+
"recall": 0.9902769593400118,
|
| 963 |
+
"f1": 0.9867880211391661
|
| 964 |
+
},
|
| 965 |
+
{
|
| 966 |
+
"acc": 0.9912,
|
| 967 |
+
"precision": 0.9833333333333333,
|
| 968 |
+
"recall": 0.9908662345315262,
|
| 969 |
+
"f1": 0.9870854123862635
|
| 970 |
+
},
|
| 971 |
+
{
|
| 972 |
+
"acc": 0.9912,
|
| 973 |
+
"precision": 0.9824868651488616,
|
| 974 |
+
"recall": 0.9917501473187978,
|
| 975 |
+
"f1": 0.9870967741935485
|
| 976 |
+
},
|
| 977 |
+
{
|
| 978 |
+
"acc": 0.9909,
|
| 979 |
+
"precision": 0.9838851450336947,
|
| 980 |
+
"recall": 0.9893930465527401,
|
| 981 |
+
"f1": 0.9866314088438372
|
| 982 |
+
},
|
| 983 |
+
{
|
| 984 |
+
"acc": 0.9911,
|
| 985 |
+
"precision": 0.9833284586136297,
|
| 986 |
+
"recall": 0.990571596935769,
|
| 987 |
+
"f1": 0.9869367385879936
|
| 988 |
+
},
|
| 989 |
+
{
|
| 990 |
+
"acc": 0.9913,
|
| 991 |
+
"precision": 0.9836209417958467,
|
| 992 |
+
"recall": 0.9908662345315262,
|
| 993 |
+
"f1": 0.9872302950242183
|
| 994 |
+
},
|
| 995 |
+
{
|
| 996 |
+
"acc": 0.9914,
|
| 997 |
+
"precision": 0.982778750729714,
|
| 998 |
+
"recall": 0.9920447849145551,
|
| 999 |
+
"f1": 0.9873900293255131
|
| 1000 |
+
},
|
| 1001 |
+
{
|
| 1002 |
+
"acc": 0.991,
|
| 1003 |
+
"precision": 0.9858823529411764,
|
| 1004 |
+
"recall": 0.9876252209781968,
|
| 1005 |
+
"f1": 0.986753017368266
|
| 1006 |
+
},
|
| 1007 |
+
{
|
| 1008 |
+
"acc": 0.9912,
|
| 1009 |
+
"precision": 0.9830508474576272,
|
| 1010 |
+
"recall": 0.9911608721272834,
|
| 1011 |
+
"f1": 0.9870892018779343
|
| 1012 |
+
},
|
| 1013 |
+
{
|
| 1014 |
+
"acc": 0.9914,
|
| 1015 |
+
"precision": 0.982778750729714,
|
| 1016 |
+
"recall": 0.9920447849145551,
|
| 1017 |
+
"f1": 0.9873900293255131
|
| 1018 |
+
},
|
| 1019 |
+
{
|
| 1020 |
+
"acc": 0.9899,
|
| 1021 |
+
"precision": 0.9875629256736749,
|
| 1022 |
+
"recall": 0.9826163818503241,
|
| 1023 |
+
"f1": 0.9850834440998375
|
| 1024 |
+
},
|
| 1025 |
+
{
|
| 1026 |
+
"acc": 0.9908,
|
| 1027 |
+
"precision": 0.986446670595168,
|
| 1028 |
+
"recall": 0.986446670595168,
|
| 1029 |
+
"f1": 0.986446670595168
|
| 1030 |
+
},
|
| 1031 |
+
{
|
| 1032 |
+
"acc": 0.9915,
|
| 1033 |
+
"precision": 0.9819399941741916,
|
| 1034 |
+
"recall": 0.993223335297584,
|
| 1035 |
+
"f1": 0.9875494360626923
|
| 1036 |
+
},
|
| 1037 |
+
{
|
| 1038 |
+
"acc": 0.9914,
|
| 1039 |
+
"precision": 0.982778750729714,
|
| 1040 |
+
"recall": 0.9920447849145551,
|
| 1041 |
+
"f1": 0.9873900293255131
|
| 1042 |
+
},
|
| 1043 |
+
{
|
| 1044 |
+
"acc": 0.9906,
|
| 1045 |
+
"precision": 0.987012987012987,
|
| 1046 |
+
"recall": 0.985268120212139,
|
| 1047 |
+
"f1": 0.9861397817752875
|
| 1048 |
+
},
|
| 1049 |
+
{
|
| 1050 |
+
"acc": 0.9908,
|
| 1051 |
+
"precision": 0.986446670595168,
|
| 1052 |
+
"recall": 0.986446670595168,
|
| 1053 |
+
"f1": 0.986446670595168
|
| 1054 |
+
},
|
| 1055 |
+
{
|
| 1056 |
+
"acc": 0.991,
|
| 1057 |
+
"precision": 0.9833235810415447,
|
| 1058 |
+
"recall": 0.9902769593400118,
|
| 1059 |
+
"f1": 0.9867880211391661
|
| 1060 |
+
},
|
| 1061 |
+
{
|
| 1062 |
+
"acc": 0.9907,
|
| 1063 |
+
"precision": 0.9864426760978485,
|
| 1064 |
+
"recall": 0.9861520329994107,
|
| 1065 |
+
"f1": 0.9862973331368794
|
| 1066 |
+
},
|
| 1067 |
+
{
|
| 1068 |
+
"acc": 0.9912,
|
| 1069 |
+
"precision": 0.9824868651488616,
|
| 1070 |
+
"recall": 0.9917501473187978,
|
| 1071 |
+
"f1": 0.9870967741935485
|
| 1072 |
+
},
|
| 1073 |
+
{
|
| 1074 |
+
"acc": 0.9911,
|
| 1075 |
+
"precision": 0.9833284586136297,
|
| 1076 |
+
"recall": 0.990571596935769,
|
| 1077 |
+
"f1": 0.9869367385879936
|
| 1078 |
+
},
|
| 1079 |
+
{
|
| 1080 |
+
"acc": 0.9908,
|
| 1081 |
+
"precision": 0.986446670595168,
|
| 1082 |
+
"recall": 0.986446670595168,
|
| 1083 |
+
"f1": 0.986446670595168
|
| 1084 |
+
},
|
| 1085 |
+
{
|
| 1086 |
+
"acc": 0.9914,
|
| 1087 |
+
"precision": 0.982778750729714,
|
| 1088 |
+
"recall": 0.9920447849145551,
|
| 1089 |
+
"f1": 0.9873900293255131
|
| 1090 |
+
},
|
| 1091 |
+
{
|
| 1092 |
+
"acc": 0.9914,
|
| 1093 |
+
"precision": 0.982778750729714,
|
| 1094 |
+
"recall": 0.9920447849145551,
|
| 1095 |
+
"f1": 0.9873900293255131
|
| 1096 |
+
},
|
| 1097 |
+
{
|
| 1098 |
+
"acc": 0.9916,
|
| 1099 |
+
"precision": 0.9825072886297376,
|
| 1100 |
+
"recall": 0.9929286977018268,
|
| 1101 |
+
"f1": 0.9876905041031652
|
| 1102 |
+
},
|
| 1103 |
+
{
|
| 1104 |
+
"acc": 0.9914,
|
| 1105 |
+
"precision": 0.982778750729714,
|
| 1106 |
+
"recall": 0.9920447849145551,
|
| 1107 |
+
"f1": 0.9873900293255131
|
| 1108 |
+
},
|
| 1109 |
+
{
|
| 1110 |
+
"acc": 0.9914,
|
| 1111 |
+
"precision": 0.982778750729714,
|
| 1112 |
+
"recall": 0.9920447849145551,
|
| 1113 |
+
"f1": 0.9873900293255131
|
| 1114 |
+
},
|
| 1115 |
+
{
|
| 1116 |
+
"acc": 0.9913,
|
| 1117 |
+
"precision": 0.9824919754887657,
|
| 1118 |
+
"recall": 0.9920447849145551,
|
| 1119 |
+
"f1": 0.9872452719542588
|
| 1120 |
+
},
|
| 1121 |
+
{
|
| 1122 |
+
"acc": 0.9915,
|
| 1123 |
+
"precision": 0.9827837758972863,
|
| 1124 |
+
"recall": 0.9923394225103123,
|
| 1125 |
+
"f1": 0.9875384840932414
|
| 1126 |
+
},
|
| 1127 |
+
{
|
| 1128 |
+
"acc": 0.9916,
|
| 1129 |
+
"precision": 0.9827887981330222,
|
| 1130 |
+
"recall": 0.9926340601060696,
|
| 1131 |
+
"f1": 0.9876868953386104
|
| 1132 |
+
},
|
| 1133 |
+
{
|
| 1134 |
+
"acc": 0.9912,
|
| 1135 |
+
"precision": 0.982768691588785,
|
| 1136 |
+
"recall": 0.9914555097230406,
|
| 1137 |
+
"f1": 0.9870929891463771
|
| 1138 |
+
},
|
| 1139 |
+
{
|
| 1140 |
+
"acc": 0.9909,
|
| 1141 |
+
"precision": 0.9833187006145742,
|
| 1142 |
+
"recall": 0.9899823217442546,
|
| 1143 |
+
"f1": 0.986639260020555
|
| 1144 |
+
},
|
| 1145 |
+
{
|
| 1146 |
+
"acc": 0.9904,
|
| 1147 |
+
"precision": 0.987005316007088,
|
| 1148 |
+
"recall": 0.9846788450206246,
|
| 1149 |
+
"f1": 0.9858407079646017
|
| 1150 |
+
},
|
| 1151 |
+
{
|
| 1152 |
+
"acc": 0.9912,
|
| 1153 |
+
"precision": 0.982768691588785,
|
| 1154 |
+
"recall": 0.9914555097230406,
|
| 1155 |
+
"f1": 0.9870929891463771
|
| 1156 |
+
},
|
| 1157 |
+
{
|
| 1158 |
+
"acc": 0.9914,
|
| 1159 |
+
"precision": 0.982778750729714,
|
| 1160 |
+
"recall": 0.9920447849145551,
|
| 1161 |
+
"f1": 0.9873900293255131
|
| 1162 |
+
}
|
| 1163 |
+
]
|
| 1164 |
+
},
|
| 1165 |
+
"hard": {
|
| 1166 |
+
"n_nodes": 40,
|
| 1167 |
+
"n_edges": 44,
|
| 1168 |
+
"gnn_final": {
|
| 1169 |
+
"acc": 0.984,
|
| 1170 |
+
"precision": 0.9533980582524272,
|
| 1171 |
+
"recall": 0.9750354609929078,
|
| 1172 |
+
"f1": 0.9640953716690043
|
| 1173 |
+
},
|
| 1174 |
+
"baseline_direct_neighbors": {
|
| 1175 |
+
"acc": 0.88875,
|
| 1176 |
+
"precision": 1.0,
|
| 1177 |
+
"recall": 0.4950354609929078,
|
| 1178 |
+
"f1": 0.6622390891840607
|
| 1179 |
+
},
|
| 1180 |
+
"improvement_f1_pp": 30.185628248494357,
|
| 1181 |
+
"train_loss_curve": [
|
| 1182 |
+
0.15102637716173195,
|
| 1183 |
+
0.052633647776499856,
|
| 1184 |
+
0.04379157433440559,
|
| 1185 |
+
0.04003102573152864,
|
| 1186 |
+
0.03876525610721728,
|
| 1187 |
+
0.0369047760956164,
|
| 1188 |
+
0.036530632421345216,
|
| 1189 |
+
0.035830124779022296,
|
| 1190 |
+
0.0349417570647056,
|
| 1191 |
+
0.035263367522318734,
|
| 1192 |
+
0.03485661885762238,
|
| 1193 |
+
0.03493121563128079,
|
| 1194 |
+
0.032977926293009656,
|
| 1195 |
+
0.03394761107103841,
|
| 1196 |
+
0.033683306101149356,
|
| 1197 |
+
0.033089775294763965,
|
| 1198 |
+
0.0335856751325955,
|
| 1199 |
+
0.03272933466515315,
|
| 1200 |
+
0.032765767610715556,
|
| 1201 |
+
0.032717534617419004,
|
| 1202 |
+
0.03298612758413583,
|
| 1203 |
+
0.03169301031356008,
|
| 1204 |
+
0.0323142114428847,
|
| 1205 |
+
0.03186470089994691,
|
| 1206 |
+
0.032041587697027356,
|
| 1207 |
+
0.03211515340814367,
|
| 1208 |
+
0.032251973500227904,
|
| 1209 |
+
0.031999882343730864,
|
| 1210 |
+
0.03164813786187369,
|
| 1211 |
+
0.03160676156320551,
|
| 1212 |
+
0.031426732700598224,
|
| 1213 |
+
0.031241096474510413,
|
| 1214 |
+
0.03162557367896079,
|
| 1215 |
+
0.03154335625256863,
|
| 1216 |
+
0.03165931336190261,
|
| 1217 |
+
0.03097459732750576,
|
| 1218 |
+
0.03131493923773814,
|
| 1219 |
+
0.0311658642354123,
|
| 1220 |
+
0.030633534374135706,
|
| 1221 |
+
0.031252258909702506,
|
| 1222 |
+
0.030825211223787848,
|
| 1223 |
+
0.03053342323340803,
|
| 1224 |
+
0.030733022628217442,
|
| 1225 |
+
0.030747544990059397,
|
| 1226 |
+
0.030629911747484584,
|
| 1227 |
+
0.030457735169680745,
|
| 1228 |
+
0.03058615475141687,
|
| 1229 |
+
0.030597560634826552,
|
| 1230 |
+
0.030619746312839653,
|
| 1231 |
+
0.03066707000986935,
|
| 1232 |
+
0.03048766604950197,
|
| 1233 |
+
0.030287153372872126,
|
| 1234 |
+
0.0303783905812179,
|
| 1235 |
+
0.030595246432494606,
|
| 1236 |
+
0.03037994001944753,
|
| 1237 |
+
0.030246819483697437,
|
| 1238 |
+
0.03012882444020579,
|
| 1239 |
+
0.03024448805347947,
|
| 1240 |
+
0.030449683469725642,
|
| 1241 |
+
0.03048290506813919,
|
| 1242 |
+
0.030136575797458136,
|
| 1243 |
+
0.02994714516170643,
|
| 1244 |
+
0.030466000927322056,
|
| 1245 |
+
0.03019473605195526,
|
| 1246 |
+
0.02987939404982535,
|
| 1247 |
+
0.030137449657182513,
|
| 1248 |
+
0.030104370625325828,
|
| 1249 |
+
0.030588962311178875,
|
| 1250 |
+
0.029767145353838714,
|
| 1251 |
+
0.030284092916966984,
|
| 1252 |
+
0.03002391016312413,
|
| 1253 |
+
0.02992785992539757,
|
| 1254 |
+
0.030997538813613574,
|
| 1255 |
+
0.029848512160238896,
|
| 1256 |
+
0.030022954882957493,
|
| 1257 |
+
0.030052907403214705,
|
| 1258 |
+
0.02975074222330568,
|
| 1259 |
+
0.029870129619877842,
|
| 1260 |
+
0.02968558935528563,
|
| 1261 |
+
0.029977637300933564
|
| 1262 |
+
],
|
| 1263 |
+
"test_metric_curve": [
|
| 1264 |
+
{
|
| 1265 |
+
"acc": 0.978625,
|
| 1266 |
+
"precision": 0.9395194697597349,
|
| 1267 |
+
"recall": 0.9651063829787234,
|
| 1268 |
+
"f1": 0.9521410579345089
|
| 1269 |
+
},
|
| 1270 |
+
{
|
| 1271 |
+
"acc": 0.9813125,
|
| 1272 |
+
"precision": 0.9460730088495575,
|
| 1273 |
+
"recall": 0.9704964539007093,
|
| 1274 |
+
"f1": 0.9581291135695281
|
| 1275 |
+
},
|
| 1276 |
+
{
|
| 1277 |
+
"acc": 0.982,
|
| 1278 |
+
"precision": 0.9607173356105893,
|
| 1279 |
+
"recall": 0.9574468085106383,
|
| 1280 |
+
"f1": 0.959079283887468
|
| 1281 |
+
},
|
| 1282 |
+
{
|
| 1283 |
+
"acc": 0.9805625,
|
| 1284 |
+
"precision": 0.9649884259259259,
|
| 1285 |
+
"recall": 0.9460992907801419,
|
| 1286 |
+
"f1": 0.9554505085231342
|
| 1287 |
+
},
|
| 1288 |
+
{
|
| 1289 |
+
"acc": 0.98225,
|
| 1290 |
+
"precision": 0.952274630198158,
|
| 1291 |
+
"recall": 0.9679432624113475,
|
| 1292 |
+
"f1": 0.9600450196961171
|
| 1293 |
+
},
|
| 1294 |
+
{
|
| 1295 |
+
"acc": 0.98225,
|
| 1296 |
+
"precision": 0.9639278557114228,
|
| 1297 |
+
"recall": 0.955177304964539,
|
| 1298 |
+
"f1": 0.9595326303790253
|
| 1299 |
+
},
|
| 1300 |
+
{
|
| 1301 |
+
"acc": 0.982375,
|
| 1302 |
+
"precision": 0.9543289436817035,
|
| 1303 |
+
"recall": 0.9662411347517731,
|
| 1304 |
+
"f1": 0.9602480969833662
|
| 1305 |
+
},
|
| 1306 |
+
{
|
| 1307 |
+
"acc": 0.98375,
|
| 1308 |
+
"precision": 0.9543556916225995,
|
| 1309 |
+
"recall": 0.9727659574468085,
|
| 1310 |
+
"f1": 0.9634728856420341
|
| 1311 |
+
},
|
| 1312 |
+
{
|
| 1313 |
+
"acc": 0.98125,
|
| 1314 |
+
"precision": 0.9680696661828737,
|
| 1315 |
+
"recall": 0.9460992907801419,
|
| 1316 |
+
"f1": 0.9569583931133429
|
| 1317 |
+
},
|
| 1318 |
+
{
|
| 1319 |
+
"acc": 0.983,
|
| 1320 |
+
"precision": 0.965379113018598,
|
| 1321 |
+
"recall": 0.9571631205673758,
|
| 1322 |
+
"f1": 0.9612535612535612
|
| 1323 |
+
},
|
| 1324 |
+
{
|
| 1325 |
+
"acc": 0.984375,
|
| 1326 |
+
"precision": 0.9593267882187938,
|
| 1327 |
+
"recall": 0.9702127659574468,
|
| 1328 |
+
"f1": 0.9647390691114245
|
| 1329 |
+
},
|
| 1330 |
+
{
|
| 1331 |
+
"acc": 0.9836875,
|
| 1332 |
+
"precision": 0.9633730834752982,
|
| 1333 |
+
"recall": 0.9625531914893617,
|
| 1334 |
+
"f1": 0.9629629629629629
|
| 1335 |
+
},
|
| 1336 |
+
{
|
| 1337 |
+
"acc": 0.98425,
|
| 1338 |
+
"precision": 0.9507022858716607,
|
| 1339 |
+
"recall": 0.979290780141844,
|
| 1340 |
+
"f1": 0.9647847959754053
|
| 1341 |
+
},
|
| 1342 |
+
{
|
| 1343 |
+
"acc": 0.983,
|
| 1344 |
+
"precision": 0.9651129539605376,
|
| 1345 |
+
"recall": 0.9574468085106383,
|
| 1346 |
+
"f1": 0.9612645969809172
|
| 1347 |
+
},
|
| 1348 |
+
{
|
| 1349 |
+
"acc": 0.9840625,
|
| 1350 |
+
"precision": 0.9587542087542088,
|
| 1351 |
+
"recall": 0.9693617021276596,
|
| 1352 |
+
"f1": 0.9640287769784174
|
| 1353 |
+
},
|
| 1354 |
+
{
|
| 1355 |
+
"acc": 0.9835625,
|
| 1356 |
+
"precision": 0.966,
|
| 1357 |
+
"recall": 0.9591489361702128,
|
| 1358 |
+
"f1": 0.9625622775800712
|
| 1359 |
+
},
|
| 1360 |
+
{
|
| 1361 |
+
"acc": 0.9839375,
|
| 1362 |
+
"precision": 0.9600225225225225,
|
| 1363 |
+
"recall": 0.9673758865248226,
|
| 1364 |
+
"f1": 0.963685177335029
|
| 1365 |
+
},
|
| 1366 |
+
{
|
| 1367 |
+
"acc": 0.98425,
|
| 1368 |
+
"precision": 0.9405114401076716,
|
| 1369 |
+
"recall": 0.9912056737588653,
|
| 1370 |
+
"f1": 0.9651933701657459
|
| 1371 |
+
},
|
| 1372 |
+
{
|
| 1373 |
+
"acc": 0.9814375,
|
| 1374 |
+
"precision": 0.9686411149825784,
|
| 1375 |
+
"recall": 0.9463829787234043,
|
| 1376 |
+
"f1": 0.9573826947912182
|
| 1377 |
+
},
|
| 1378 |
+
{
|
| 1379 |
+
"acc": 0.9831875,
|
| 1380 |
+
"precision": 0.955512031337437,
|
| 1381 |
+
"recall": 0.9687943262411347,
|
| 1382 |
+
"f1": 0.9621073390618397
|
| 1383 |
+
},
|
| 1384 |
+
{
|
| 1385 |
+
"acc": 0.9836875,
|
| 1386 |
+
"precision": 0.9515771997786386,
|
| 1387 |
+
"recall": 0.9756028368794326,
|
| 1388 |
+
"f1": 0.9634402577391792
|
| 1389 |
+
},
|
| 1390 |
+
{
|
| 1391 |
+
"acc": 0.9860625,
|
| 1392 |
+
"precision": 0.9565818584070797,
|
| 1393 |
+
"recall": 0.9812765957446808,
|
| 1394 |
+
"f1": 0.9687718806889791
|
| 1395 |
+
},
|
| 1396 |
+
{
|
| 1397 |
+
"acc": 0.9835625,
|
| 1398 |
+
"precision": 0.9505524861878453,
|
| 1399 |
+
"recall": 0.9761702127659575,
|
| 1400 |
+
"f1": 0.9631910426871939
|
| 1401 |
+
},
|
| 1402 |
+
{
|
| 1403 |
+
"acc": 0.9853125,
|
| 1404 |
+
"precision": 0.9472539423599783,
|
| 1405 |
+
"recall": 0.9883687943262411,
|
| 1406 |
+
"f1": 0.9673747049840344
|
| 1407 |
+
},
|
| 1408 |
+
{
|
| 1409 |
+
"acc": 0.9860625,
|
| 1410 |
+
"precision": 0.9479110146500271,
|
| 1411 |
+
"recall": 0.9912056737588653,
|
| 1412 |
+
"f1": 0.9690750242684788
|
| 1413 |
+
},
|
| 1414 |
+
{
|
| 1415 |
+
"acc": 0.982875,
|
| 1416 |
+
"precision": 0.9645613032294942,
|
| 1417 |
+
"recall": 0.9574468085106383,
|
| 1418 |
+
"f1": 0.960990888382688
|
| 1419 |
+
},
|
| 1420 |
+
{
|
| 1421 |
+
"acc": 0.9843125,
|
| 1422 |
+
"precision": 0.9606077658975802,
|
| 1423 |
+
"recall": 0.9685106382978723,
|
| 1424 |
+
"f1": 0.9645430145500776
|
| 1425 |
+
},
|
| 1426 |
+
{
|
| 1427 |
+
"acc": 0.9840625,
|
| 1428 |
+
"precision": 0.9501651982378855,
|
| 1429 |
+
"recall": 0.9790070921985815,
|
| 1430 |
+
"f1": 0.9643705463182898
|
| 1431 |
+
},
|
| 1432 |
+
{
|
| 1433 |
+
"acc": 0.983375,
|
| 1434 |
+
"precision": 0.9568264648163723,
|
| 1435 |
+
"recall": 0.9682269503546099,
|
| 1436 |
+
"f1": 0.9624929498025946
|
| 1437 |
+
},
|
| 1438 |
+
{
|
| 1439 |
+
"acc": 0.98375,
|
| 1440 |
+
"precision": 0.9505934308584046,
|
| 1441 |
+
"recall": 0.9770212765957447,
|
| 1442 |
+
"f1": 0.9636261891438165
|
| 1443 |
+
},
|
| 1444 |
+
{
|
| 1445 |
+
"acc": 0.9845,
|
| 1446 |
+
"precision": 0.9555184876285794,
|
| 1447 |
+
"recall": 0.9750354609929078,
|
| 1448 |
+
"f1": 0.9651783206964335
|
| 1449 |
+
},
|
| 1450 |
+
{
|
| 1451 |
+
"acc": 0.9830625,
|
| 1452 |
+
"precision": 0.9557422969187676,
|
| 1453 |
+
"recall": 0.9679432624113475,
|
| 1454 |
+
"f1": 0.9618040873854828
|
| 1455 |
+
},
|
| 1456 |
+
{
|
| 1457 |
+
"acc": 0.983375,
|
| 1458 |
+
"precision": 0.9555493430248811,
|
| 1459 |
+
"recall": 0.969645390070922,
|
| 1460 |
+
"f1": 0.9625457617572516
|
| 1461 |
+
},
|
| 1462 |
+
{
|
| 1463 |
+
"acc": 0.984,
|
| 1464 |
+
"precision": 0.9511454595638973,
|
| 1465 |
+
"recall": 0.9775886524822694,
|
| 1466 |
+
"f1": 0.9641857862339116
|
| 1467 |
+
},
|
| 1468 |
+
{
|
| 1469 |
+
"acc": 0.9845625,
|
| 1470 |
+
"precision": 0.9611705120990434,
|
| 1471 |
+
"recall": 0.9690780141843972,
|
| 1472 |
+
"f1": 0.9651080661110327
|
| 1473 |
+
},
|
| 1474 |
+
{
|
| 1475 |
+
"acc": 0.984625,
|
| 1476 |
+
"precision": 0.9565580618212197,
|
| 1477 |
+
"recall": 0.9744680851063829,
|
| 1478 |
+
"f1": 0.9654300168634065
|
| 1479 |
+
},
|
| 1480 |
+
{
|
| 1481 |
+
"acc": 0.9846875,
|
| 1482 |
+
"precision": 0.9563160823594881,
|
| 1483 |
+
"recall": 0.9750354609929078,
|
| 1484 |
+
"f1": 0.9655850540806294
|
| 1485 |
+
},
|
| 1486 |
+
{
|
| 1487 |
+
"acc": 0.9856875,
|
| 1488 |
+
"precision": 0.9461288576069301,
|
| 1489 |
+
"recall": 0.9914893617021276,
|
| 1490 |
+
"f1": 0.9682781548690954
|
| 1491 |
+
},
|
| 1492 |
+
{
|
| 1493 |
+
"acc": 0.9841875,
|
| 1494 |
+
"precision": 0.9631936579841449,
|
| 1495 |
+
"recall": 0.9651063829787234,
|
| 1496 |
+
"f1": 0.9641490718435596
|
| 1497 |
+
},
|
| 1498 |
+
{
|
| 1499 |
+
"acc": 0.98475,
|
| 1500 |
+
"precision": 0.9560745065332221,
|
| 1501 |
+
"recall": 0.9756028368794326,
|
| 1502 |
+
"f1": 0.9657399606852007
|
| 1503 |
+
},
|
| 1504 |
+
{
|
| 1505 |
+
"acc": 0.9836875,
|
| 1506 |
+
"precision": 0.9558659217877095,
|
| 1507 |
+
"recall": 0.9707801418439717,
|
| 1508 |
+
"f1": 0.963265306122449
|
| 1509 |
+
},
|
| 1510 |
+
{
|
| 1511 |
+
"acc": 0.9854375,
|
| 1512 |
+
"precision": 0.9497267759562842,
|
| 1513 |
+
"recall": 0.9860992907801418,
|
| 1514 |
+
"f1": 0.967571329157968
|
| 1515 |
+
},
|
| 1516 |
+
{
|
| 1517 |
+
"acc": 0.9844375,
|
| 1518 |
+
"precision": 0.9502473886750962,
|
| 1519 |
+
"recall": 0.9807092198581561,
|
| 1520 |
+
"f1": 0.9652380287588997
|
| 1521 |
+
},
|
| 1522 |
+
{
|
| 1523 |
+
"acc": 0.9844375,
|
| 1524 |
+
"precision": 0.9601123595505618,
|
| 1525 |
+
"recall": 0.969645390070922,
|
| 1526 |
+
"f1": 0.9648553281580804
|
| 1527 |
+
},
|
| 1528 |
+
{
|
| 1529 |
+
"acc": 0.98475,
|
| 1530 |
+
"precision": 0.957345971563981,
|
| 1531 |
+
"recall": 0.9741843971631206,
|
| 1532 |
+
"f1": 0.9656917885264341
|
| 1533 |
+
},
|
| 1534 |
+
{
|
| 1535 |
+
"acc": 0.983625,
|
| 1536 |
+
"precision": 0.9543302701197438,
|
| 1537 |
+
"recall": 0.9721985815602837,
|
| 1538 |
+
"f1": 0.9631815626756605
|
| 1539 |
+
},
|
| 1540 |
+
{
|
| 1541 |
+
"acc": 0.9839375,
|
| 1542 |
+
"precision": 0.9526315789473684,
|
| 1543 |
+
"recall": 0.9756028368794326,
|
| 1544 |
+
"f1": 0.9639803784162578
|
| 1545 |
+
},
|
| 1546 |
+
{
|
| 1547 |
+
"acc": 0.9833125,
|
| 1548 |
+
"precision": 0.9509966777408638,
|
| 1549 |
+
"recall": 0.9744680851063829,
|
| 1550 |
+
"f1": 0.962589323245061
|
| 1551 |
+
},
|
| 1552 |
+
{
|
| 1553 |
+
"acc": 0.98425,
|
| 1554 |
+
"precision": 0.9499587572174869,
|
| 1555 |
+
"recall": 0.9801418439716312,
|
| 1556 |
+
"f1": 0.9648142976822116
|
| 1557 |
+
},
|
| 1558 |
+
{
|
| 1559 |
+
"acc": 0.984375,
|
| 1560 |
+
"precision": 0.9590692458648724,
|
| 1561 |
+
"recall": 0.9704964539007093,
|
| 1562 |
+
"f1": 0.9647490129723633
|
| 1563 |
+
},
|
| 1564 |
+
{
|
| 1565 |
+
"acc": 0.9838125,
|
| 1566 |
+
"precision": 0.9528563505268997,
|
| 1567 |
+
"recall": 0.9747517730496454,
|
| 1568 |
+
"f1": 0.9636797083158043
|
| 1569 |
+
},
|
| 1570 |
+
{
|
| 1571 |
+
"acc": 0.9848125,
|
| 1572 |
+
"precision": 0.9553274139844617,
|
| 1573 |
+
"recall": 0.9767375886524823,
|
| 1574 |
+
"f1": 0.965913872913452
|
| 1575 |
+
},
|
| 1576 |
+
{
|
| 1577 |
+
"acc": 0.9836875,
|
| 1578 |
+
"precision": 0.9551031790295594,
|
| 1579 |
+
"recall": 0.9716312056737588,
|
| 1580 |
+
"f1": 0.963296301504711
|
| 1581 |
+
},
|
| 1582 |
+
{
|
| 1583 |
+
"acc": 0.9845,
|
| 1584 |
+
"precision": 0.9429575560962422,
|
| 1585 |
+
"recall": 0.9895035460992908,
|
| 1586 |
+
"f1": 0.965669988925803
|
| 1587 |
+
},
|
| 1588 |
+
{
|
| 1589 |
+
"acc": 0.982375,
|
| 1590 |
+
"precision": 0.9589583923011605,
|
| 1591 |
+
"recall": 0.9611347517730496,
|
| 1592 |
+
"f1": 0.9600453386228394
|
| 1593 |
+
},
|
| 1594 |
+
{
|
| 1595 |
+
"acc": 0.984375,
|
| 1596 |
+
"precision": 0.962439988703756,
|
| 1597 |
+
"recall": 0.9668085106382979,
|
| 1598 |
+
"f1": 0.9646193037078971
|
| 1599 |
+
},
|
| 1600 |
+
{
|
| 1601 |
+
"acc": 0.985625,
|
| 1602 |
+
"precision": 0.9517411571154374,
|
| 1603 |
+
"recall": 0.9846808510638297,
|
| 1604 |
+
"f1": 0.967930842163971
|
| 1605 |
+
},
|
| 1606 |
+
{
|
| 1607 |
+
"acc": 0.98325,
|
| 1608 |
+
"precision": 0.9596387242449901,
|
| 1609 |
+
"recall": 0.9645390070921985,
|
| 1610 |
+
"f1": 0.9620826259196378
|
| 1611 |
+
},
|
| 1612 |
+
{
|
| 1613 |
+
"acc": 0.984,
|
| 1614 |
+
"precision": 0.9647426784191072,
|
| 1615 |
+
"recall": 0.9625531914893617,
|
| 1616 |
+
"f1": 0.9636466912808862
|
| 1617 |
+
},
|
| 1618 |
+
{
|
| 1619 |
+
"acc": 0.984875,
|
| 1620 |
+
"precision": 0.9586476669460743,
|
| 1621 |
+
"recall": 0.9733333333333334,
|
| 1622 |
+
"f1": 0.9659346846846848
|
| 1623 |
+
},
|
| 1624 |
+
{
|
| 1625 |
+
"acc": 0.9850625,
|
| 1626 |
+
"precision": 0.9581706636921361,
|
| 1627 |
+
"recall": 0.9747517730496454,
|
| 1628 |
+
"f1": 0.9663900998453102
|
| 1629 |
+
},
|
| 1630 |
+
{
|
| 1631 |
+
"acc": 0.9836875,
|
| 1632 |
+
"precision": 0.9493392070484582,
|
| 1633 |
+
"recall": 0.9781560283687943,
|
| 1634 |
+
"f1": 0.9635322062316614
|
| 1635 |
+
},
|
| 1636 |
+
{
|
| 1637 |
+
"acc": 0.983125,
|
| 1638 |
+
"precision": 0.9575484959235311,
|
| 1639 |
+
"recall": 0.9662411347517731,
|
| 1640 |
+
"f1": 0.9618751765038125
|
| 1641 |
+
},
|
| 1642 |
+
{
|
| 1643 |
+
"acc": 0.98425,
|
| 1644 |
+
"precision": 0.9492176777381279,
|
| 1645 |
+
"recall": 0.9809929078014185,
|
| 1646 |
+
"f1": 0.9648437500000001
|
| 1647 |
+
},
|
| 1648 |
+
{
|
| 1649 |
+
"acc": 0.9826875,
|
| 1650 |
+
"precision": 0.9672036823935558,
|
| 1651 |
+
"recall": 0.953758865248227,
|
| 1652 |
+
"f1": 0.960434223682331
|
| 1653 |
+
},
|
| 1654 |
+
{
|
| 1655 |
+
"acc": 0.9845,
|
| 1656 |
+
"precision": 0.961679346294731,
|
| 1657 |
+
"recall": 0.9682269503546099,
|
| 1658 |
+
"f1": 0.964942041277919
|
| 1659 |
+
},
|
| 1660 |
+
{
|
| 1661 |
+
"acc": 0.9845,
|
| 1662 |
+
"precision": 0.960900140646976,
|
| 1663 |
+
"recall": 0.9690780141843972,
|
| 1664 |
+
"f1": 0.9649717514124294
|
| 1665 |
+
},
|
| 1666 |
+
{
|
| 1667 |
+
"acc": 0.984125,
|
| 1668 |
+
"precision": 0.9623975120158327,
|
| 1669 |
+
"recall": 0.9656737588652482,
|
| 1670 |
+
"f1": 0.9640328518833192
|
| 1671 |
+
},
|
| 1672 |
+
{
|
| 1673 |
+
"acc": 0.984875,
|
| 1674 |
+
"precision": 0.9571150097465887,
|
| 1675 |
+
"recall": 0.9750354609929078,
|
| 1676 |
+
"f1": 0.9659921304103429
|
| 1677 |
+
},
|
| 1678 |
+
{
|
| 1679 |
+
"acc": 0.984625,
|
| 1680 |
+
"precision": 0.9598877980364656,
|
| 1681 |
+
"recall": 0.9707801418439717,
|
| 1682 |
+
"f1": 0.9653032440056418
|
| 1683 |
+
},
|
| 1684 |
+
{
|
| 1685 |
+
"acc": 0.98375,
|
| 1686 |
+
"precision": 0.9546087440824282,
|
| 1687 |
+
"recall": 0.9724822695035461,
|
| 1688 |
+
"f1": 0.9634626194491286
|
| 1689 |
+
},
|
| 1690 |
+
{
|
| 1691 |
+
"acc": 0.984125,
|
| 1692 |
+
"precision": 0.9501789154968345,
|
| 1693 |
+
"recall": 0.979290780141844,
|
| 1694 |
+
"f1": 0.9645152277172394
|
| 1695 |
+
},
|
| 1696 |
+
{
|
| 1697 |
+
"acc": 0.9849375,
|
| 1698 |
+
"precision": 0.9607182940516273,
|
| 1699 |
+
"recall": 0.9713475177304964,
|
| 1700 |
+
"f1": 0.9660036676541119
|
| 1701 |
+
},
|
| 1702 |
+
{
|
| 1703 |
+
"acc": 0.984875,
|
| 1704 |
+
"precision": 0.956606397774687,
|
| 1705 |
+
"recall": 0.9756028368794326,
|
| 1706 |
+
"f1": 0.9660112359550562
|
| 1707 |
+
},
|
| 1708 |
+
{
|
| 1709 |
+
"acc": 0.984625,
|
| 1710 |
+
"precision": 0.9570671870643992,
|
| 1711 |
+
"recall": 0.9739007092198582,
|
| 1712 |
+
"f1": 0.9654105736782902
|
| 1713 |
+
},
|
| 1714 |
+
{
|
| 1715 |
+
"acc": 0.9849375,
|
| 1716 |
+
"precision": 0.9584031267448353,
|
| 1717 |
+
"recall": 0.9739007092198582,
|
| 1718 |
+
"f1": 0.9660897706486562
|
| 1719 |
+
},
|
| 1720 |
+
{
|
| 1721 |
+
"acc": 0.98375,
|
| 1722 |
+
"precision": 0.9523413688002217,
|
| 1723 |
+
"recall": 0.9750354609929078,
|
| 1724 |
+
"f1": 0.9635548079618728
|
| 1725 |
+
},
|
| 1726 |
+
{
|
| 1727 |
+
"acc": 0.984,
|
| 1728 |
+
"precision": 0.9536497363308354,
|
| 1729 |
+
"recall": 0.9747517730496454,
|
| 1730 |
+
"f1": 0.9640852974186307
|
| 1731 |
+
},
|
| 1732 |
+
{
|
| 1733 |
+
"acc": 0.98375,
|
| 1734 |
+
"precision": 0.9505934308584046,
|
| 1735 |
+
"recall": 0.9770212765957447,
|
| 1736 |
+
"f1": 0.9636261891438165
|
| 1737 |
+
},
|
| 1738 |
+
{
|
| 1739 |
+
"acc": 0.984,
|
| 1740 |
+
"precision": 0.9533980582524272,
|
| 1741 |
+
"recall": 0.9750354609929078,
|
| 1742 |
+
"f1": 0.9640953716690043
|
| 1743 |
+
}
|
| 1744 |
+
]
|
| 1745 |
+
}
|
| 1746 |
+
},
|
| 1747 |
+
"config": {
|
| 1748 |
+
"n_train": 2000,
|
| 1749 |
+
"n_test": 400,
|
| 1750 |
+
"hidden_dim": 64,
|
| 1751 |
+
"epochs": 80,
|
| 1752 |
+
"lr": 0.002,
|
| 1753 |
+
"max_hops": 3
|
| 1754 |
+
},
|
| 1755 |
+
"elapsed_min": 21.402417866388955
|
| 1756 |
}
|
FINAL_SUBMIT/receipts/ablation_matrix.json
CHANGED
|
@@ -1,95 +1,95 @@
|
|
| 1 |
-
{
|
| 2 |
-
"framework": "leave-one-out reward ablation per RL guide \u00a77-8",
|
| 3 |
-
"n_episodes_per_trial": 100,
|
| 4 |
-
"baseline": {
|
| 5 |
-
"disabled": "none",
|
| 6 |
-
"mean_return": 0.6742,
|
| 7 |
-
"solve_rate": 0.27,
|
| 8 |
-
"n_episodes": 100
|
| 9 |
-
},
|
| 10 |
-
"ablations": [
|
| 11 |
-
{
|
| 12 |
-
"disabled": "green_credit",
|
| 13 |
-
"mean_return": 0.2152,
|
| 14 |
-
"solve_rate": 0.27,
|
| 15 |
-
"n_episodes": 100,
|
| 16 |
-
"delta_mean_return": -0.459,
|
| 17 |
-
"pct_change": -68.08
|
| 18 |
-
},
|
| 19 |
-
{
|
| 20 |
-
"disabled": "yellow_credit",
|
| 21 |
-
"mean_return": 0.613,
|
| 22 |
-
"solve_rate": 0.27,
|
| 23 |
-
"n_episodes": 100,
|
| 24 |
-
"delta_mean_return": -0.0612,
|
| 25 |
-
"pct_change": -9.08
|
| 26 |
-
},
|
| 27 |
-
{
|
| 28 |
-
"disabled": "solve_bonus",
|
| 29 |
-
"mean_return": 0.4042,
|
| 30 |
-
"solve_rate": 0.27,
|
| 31 |
-
"n_episodes": 100,
|
| 32 |
-
"delta_mean_return": -0.27,
|
| 33 |
-
"pct_change": -40.05
|
| 34 |
-
},
|
| 35 |
-
{
|
| 36 |
-
"disabled": "guess_count_bonus",
|
| 37 |
-
"mean_return": 0.6442,
|
| 38 |
-
"solve_rate": 0.27,
|
| 39 |
-
"n_episodes": 100,
|
| 40 |
-
"delta_mean_return": -0.03,
|
| 41 |
-
"pct_change": -4.45
|
| 42 |
-
},
|
| 43 |
-
{
|
| 44 |
-
"disabled": "timeout_penalty",
|
| 45 |
-
"mean_return": 0.8202,
|
| 46 |
-
"solve_rate": 0.27,
|
| 47 |
-
"n_episodes": 100,
|
| 48 |
-
"delta_mean_return": 0.146,
|
| 49 |
-
"pct_change": 21.66
|
| 50 |
-
}
|
| 51 |
-
],
|
| 52 |
-
"ranked_by_impact": [
|
| 53 |
-
{
|
| 54 |
-
"disabled": "green_credit",
|
| 55 |
-
"mean_return": 0.2152,
|
| 56 |
-
"solve_rate": 0.27,
|
| 57 |
-
"n_episodes": 100,
|
| 58 |
-
"delta_mean_return": -0.459,
|
| 59 |
-
"pct_change": -68.08
|
| 60 |
-
},
|
| 61 |
-
{
|
| 62 |
-
"disabled": "solve_bonus",
|
| 63 |
-
"mean_return": 0.4042,
|
| 64 |
-
"solve_rate": 0.27,
|
| 65 |
-
"n_episodes": 100,
|
| 66 |
-
"delta_mean_return": -0.27,
|
| 67 |
-
"pct_change": -40.05
|
| 68 |
-
},
|
| 69 |
-
{
|
| 70 |
-
"disabled": "timeout_penalty",
|
| 71 |
-
"mean_return": 0.8202,
|
| 72 |
-
"solve_rate": 0.27,
|
| 73 |
-
"n_episodes": 100,
|
| 74 |
-
"delta_mean_return": 0.146,
|
| 75 |
-
"pct_change": 21.66
|
| 76 |
-
},
|
| 77 |
-
{
|
| 78 |
-
"disabled": "yellow_credit",
|
| 79 |
-
"mean_return": 0.613,
|
| 80 |
-
"solve_rate": 0.27,
|
| 81 |
-
"n_episodes": 100,
|
| 82 |
-
"delta_mean_return": -0.0612,
|
| 83 |
-
"pct_change": -9.08
|
| 84 |
-
},
|
| 85 |
-
{
|
| 86 |
-
"disabled": "guess_count_bonus",
|
| 87 |
-
"mean_return": 0.6442,
|
| 88 |
-
"solve_rate": 0.27,
|
| 89 |
-
"n_episodes": 100,
|
| 90 |
-
"delta_mean_return": -0.03,
|
| 91 |
-
"pct_change": -4.45
|
| 92 |
-
}
|
| 93 |
-
],
|
| 94 |
-
"insight": "components ranked by metric drop when removed reveal which reward signals are load-bearing"
|
| 95 |
}
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"framework": "leave-one-out reward ablation per RL guide \u00a77-8",
|
| 3 |
+
"n_episodes_per_trial": 100,
|
| 4 |
+
"baseline": {
|
| 5 |
+
"disabled": "none",
|
| 6 |
+
"mean_return": 0.6742,
|
| 7 |
+
"solve_rate": 0.27,
|
| 8 |
+
"n_episodes": 100
|
| 9 |
+
},
|
| 10 |
+
"ablations": [
|
| 11 |
+
{
|
| 12 |
+
"disabled": "green_credit",
|
| 13 |
+
"mean_return": 0.2152,
|
| 14 |
+
"solve_rate": 0.27,
|
| 15 |
+
"n_episodes": 100,
|
| 16 |
+
"delta_mean_return": -0.459,
|
| 17 |
+
"pct_change": -68.08
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"disabled": "yellow_credit",
|
| 21 |
+
"mean_return": 0.613,
|
| 22 |
+
"solve_rate": 0.27,
|
| 23 |
+
"n_episodes": 100,
|
| 24 |
+
"delta_mean_return": -0.0612,
|
| 25 |
+
"pct_change": -9.08
|
| 26 |
+
},
|
| 27 |
+
{
|
| 28 |
+
"disabled": "solve_bonus",
|
| 29 |
+
"mean_return": 0.4042,
|
| 30 |
+
"solve_rate": 0.27,
|
| 31 |
+
"n_episodes": 100,
|
| 32 |
+
"delta_mean_return": -0.27,
|
| 33 |
+
"pct_change": -40.05
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"disabled": "guess_count_bonus",
|
| 37 |
+
"mean_return": 0.6442,
|
| 38 |
+
"solve_rate": 0.27,
|
| 39 |
+
"n_episodes": 100,
|
| 40 |
+
"delta_mean_return": -0.03,
|
| 41 |
+
"pct_change": -4.45
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"disabled": "timeout_penalty",
|
| 45 |
+
"mean_return": 0.8202,
|
| 46 |
+
"solve_rate": 0.27,
|
| 47 |
+
"n_episodes": 100,
|
| 48 |
+
"delta_mean_return": 0.146,
|
| 49 |
+
"pct_change": 21.66
|
| 50 |
+
}
|
| 51 |
+
],
|
| 52 |
+
"ranked_by_impact": [
|
| 53 |
+
{
|
| 54 |
+
"disabled": "green_credit",
|
| 55 |
+
"mean_return": 0.2152,
|
| 56 |
+
"solve_rate": 0.27,
|
| 57 |
+
"n_episodes": 100,
|
| 58 |
+
"delta_mean_return": -0.459,
|
| 59 |
+
"pct_change": -68.08
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"disabled": "solve_bonus",
|
| 63 |
+
"mean_return": 0.4042,
|
| 64 |
+
"solve_rate": 0.27,
|
| 65 |
+
"n_episodes": 100,
|
| 66 |
+
"delta_mean_return": -0.27,
|
| 67 |
+
"pct_change": -40.05
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"disabled": "timeout_penalty",
|
| 71 |
+
"mean_return": 0.8202,
|
| 72 |
+
"solve_rate": 0.27,
|
| 73 |
+
"n_episodes": 100,
|
| 74 |
+
"delta_mean_return": 0.146,
|
| 75 |
+
"pct_change": 21.66
|
| 76 |
+
},
|
| 77 |
+
{
|
| 78 |
+
"disabled": "yellow_credit",
|
| 79 |
+
"mean_return": 0.613,
|
| 80 |
+
"solve_rate": 0.27,
|
| 81 |
+
"n_episodes": 100,
|
| 82 |
+
"delta_mean_return": -0.0612,
|
| 83 |
+
"pct_change": -9.08
|
| 84 |
+
},
|
| 85 |
+
{
|
| 86 |
+
"disabled": "guess_count_bonus",
|
| 87 |
+
"mean_return": 0.6442,
|
| 88 |
+
"solve_rate": 0.27,
|
| 89 |
+
"n_episodes": 100,
|
| 90 |
+
"delta_mean_return": -0.03,
|
| 91 |
+
"pct_change": -4.45
|
| 92 |
+
}
|
| 93 |
+
],
|
| 94 |
+
"insight": "components ranked by metric drop when removed reveal which reward signals are load-bearing"
|
| 95 |
}
|
FINAL_SUBMIT/receipts/adversarial_20_attack_gauntlet.json
CHANGED
|
@@ -1,217 +1,217 @@
|
|
| 1 |
-
{
|
| 2 |
-
"started_at": 1777142712.0545185,
|
| 3 |
-
"framework": "RL guide \u00a738-44 + Skalse 2022 + Krakovna 2020",
|
| 4 |
-
"n_total": 20,
|
| 5 |
-
"results": [
|
| 6 |
-
{
|
| 7 |
-
"id": 1,
|
| 8 |
-
"name": "empty_string",
|
| 9 |
-
"expected_reason": "format_invalid",
|
| 10 |
-
"actual_reason": "format_invalid",
|
| 11 |
-
"reward_extracted": 0.0,
|
| 12 |
-
"blocked": true,
|
| 13 |
-
"is_legit_test": false,
|
| 14 |
-
"verdict": "BLOCKED"
|
| 15 |
-
},
|
| 16 |
-
{
|
| 17 |
-
"id": 2,
|
| 18 |
-
"name": "single_letter",
|
| 19 |
-
"expected_reason": "format_invalid",
|
| 20 |
-
"actual_reason": "format_invalid",
|
| 21 |
-
"reward_extracted": 0.0,
|
| 22 |
-
"blocked": true,
|
| 23 |
-
"is_legit_test": false,
|
| 24 |
-
"verdict": "BLOCKED"
|
| 25 |
-
},
|
| 26 |
-
{
|
| 27 |
-
"id": 3,
|
| 28 |
-
"name": "digits_only",
|
| 29 |
-
"expected_reason": "format_invalid",
|
| 30 |
-
"actual_reason": "format_invalid",
|
| 31 |
-
"reward_extracted": 0.0,
|
| 32 |
-
"blocked": true,
|
| 33 |
-
"is_legit_test": false,
|
| 34 |
-
"verdict": "BLOCKED"
|
| 35 |
-
},
|
| 36 |
-
{
|
| 37 |
-
"id": 4,
|
| 38 |
-
"name": "unicode_homoglyph",
|
| 39 |
-
"expected_reason": "format_invalid",
|
| 40 |
-
"actual_reason": "format_invalid",
|
| 41 |
-
"reward_extracted": 0.0,
|
| 42 |
-
"blocked": true,
|
| 43 |
-
"is_legit_test": false,
|
| 44 |
-
"verdict": "BLOCKED"
|
| 45 |
-
},
|
| 46 |
-
{
|
| 47 |
-
"id": 5,
|
| 48 |
-
"name": "6char_word",
|
| 49 |
-
"expected_reason": "format_invalid",
|
| 50 |
-
"actual_reason": "format_invalid",
|
| 51 |
-
"reward_extracted": 0.0,
|
| 52 |
-
"blocked": true,
|
| 53 |
-
"is_legit_test": false,
|
| 54 |
-
"verdict": "BLOCKED"
|
| 55 |
-
},
|
| 56 |
-
{
|
| 57 |
-
"id": 6,
|
| 58 |
-
"name": "4char_word",
|
| 59 |
-
"expected_reason": "format_invalid",
|
| 60 |
-
"actual_reason": "format_invalid",
|
| 61 |
-
"reward_extracted": 0.0,
|
| 62 |
-
"blocked": true,
|
| 63 |
-
"is_legit_test": false,
|
| 64 |
-
"verdict": "BLOCKED"
|
| 65 |
-
},
|
| 66 |
-
{
|
| 67 |
-
"id": 7,
|
| 68 |
-
"name": "uppercase_only",
|
| 69 |
-
"expected_reason": "accepted_after_normalize",
|
| 70 |
-
"actual_reason": "accepted_after_normalize",
|
| 71 |
-
"reward_extracted": 0.0,
|
| 72 |
-
"blocked": false,
|
| 73 |
-
"is_legit_test": true,
|
| 74 |
-
"verdict": "LEGIT_ACCEPTED"
|
| 75 |
-
},
|
| 76 |
-
{
|
| 77 |
-
"id": 8,
|
| 78 |
-
"name": "whitespace_padded",
|
| 79 |
-
"expected_reason": "format_invalid",
|
| 80 |
-
"actual_reason": "format_invalid",
|
| 81 |
-
"reward_extracted": 0.0,
|
| 82 |
-
"blocked": true,
|
| 83 |
-
"is_legit_test": false,
|
| 84 |
-
"verdict": "BLOCKED"
|
| 85 |
-
},
|
| 86 |
-
{
|
| 87 |
-
"id": 9,
|
| 88 |
-
"name": "null_action",
|
| 89 |
-
"expected_reason": "format_invalid",
|
| 90 |
-
"actual_reason": "format_invalid",
|
| 91 |
-
"reward_extracted": 0.0,
|
| 92 |
-
"blocked": true,
|
| 93 |
-
"is_legit_test": false,
|
| 94 |
-
"verdict": "BLOCKED"
|
| 95 |
-
},
|
| 96 |
-
{
|
| 97 |
-
"id": 10,
|
| 98 |
-
"name": "dict_lookalike_zzzzz",
|
| 99 |
-
"expected_reason": "non_dictionary",
|
| 100 |
-
"actual_reason": "non_dictionary",
|
| 101 |
-
"reward_extracted": 0.0,
|
| 102 |
-
"blocked": true,
|
| 103 |
-
"is_legit_test": false,
|
| 104 |
-
"verdict": "BLOCKED"
|
| 105 |
-
},
|
| 106 |
-
{
|
| 107 |
-
"id": 11,
|
| 108 |
-
"name": "repeat_same_guess_5x",
|
| 109 |
-
"expected_reason": "rolling_no_progress",
|
| 110 |
-
"actual_reason": "rolling_no_progress",
|
| 111 |
-
"reward_extracted": 0.07,
|
| 112 |
-
"blocked": true,
|
| 113 |
-
"is_legit_test": false,
|
| 114 |
-
"verdict": "BLOCKED"
|
| 115 |
-
},
|
| 116 |
-
{
|
| 117 |
-
"id": 12,
|
| 118 |
-
"name": "solved_word_repeat",
|
| 119 |
-
"expected_reason": "episode_done",
|
| 120 |
-
"actual_reason": "episode_done",
|
| 121 |
-
"reward_extracted": 1.0,
|
| 122 |
-
"blocked": true,
|
| 123 |
-
"is_legit_test": false,
|
| 124 |
-
"verdict": "BLOCKED"
|
| 125 |
-
},
|
| 126 |
-
{
|
| 127 |
-
"id": 13,
|
| 128 |
-
"name": "unicode_zero_width",
|
| 129 |
-
"expected_reason": "format_invalid",
|
| 130 |
-
"actual_reason": "format_invalid",
|
| 131 |
-
"reward_extracted": 0.0,
|
| 132 |
-
"blocked": true,
|
| 133 |
-
"is_legit_test": false,
|
| 134 |
-
"verdict": "BLOCKED"
|
| 135 |
-
},
|
| 136 |
-
{
|
| 137 |
-
"id": 14,
|
| 138 |
-
"name": "sql_injection",
|
| 139 |
-
"expected_reason": "format_invalid",
|
| 140 |
-
"actual_reason": "format_invalid",
|
| 141 |
-
"reward_extracted": 0.0,
|
| 142 |
-
"blocked": true,
|
| 143 |
-
"is_legit_test": false,
|
| 144 |
-
"verdict": "BLOCKED"
|
| 145 |
-
},
|
| 146 |
-
{
|
| 147 |
-
"id": 15,
|
| 148 |
-
"name": "path_traversal",
|
| 149 |
-
"expected_reason": "format_invalid",
|
| 150 |
-
"actual_reason": "format_invalid",
|
| 151 |
-
"reward_extracted": 0.0,
|
| 152 |
-
"blocked": true,
|
| 153 |
-
"is_legit_test": false,
|
| 154 |
-
"verdict": "BLOCKED"
|
| 155 |
-
},
|
| 156 |
-
{
|
| 157 |
-
"id": 16,
|
| 158 |
-
"name": "extremely_long_string",
|
| 159 |
-
"expected_reason": "format_invalid",
|
| 160 |
-
"actual_reason": "format_invalid",
|
| 161 |
-
"reward_extracted": 0.0,
|
| 162 |
-
"blocked": true,
|
| 163 |
-
"is_legit_test": false,
|
| 164 |
-
"verdict": "BLOCKED"
|
| 165 |
-
},
|
| 166 |
-
{
|
| 167 |
-
"id": 17,
|
| 168 |
-
"name": "json_object_payload",
|
| 169 |
-
"expected_reason": "format_invalid",
|
| 170 |
-
"actual_reason": "format_invalid",
|
| 171 |
-
"reward_extracted": 0.0,
|
| 172 |
-
"blocked": true,
|
| 173 |
-
"is_legit_test": false,
|
| 174 |
-
"verdict": "BLOCKED"
|
| 175 |
-
},
|
| 176 |
-
{
|
| 177 |
-
"id": 18,
|
| 178 |
-
"name": "negative_action_index",
|
| 179 |
-
"expected_reason": "out_of_bounds",
|
| 180 |
-
"actual_reason": "format_invalid",
|
| 181 |
-
"reward_extracted": 0.0,
|
| 182 |
-
"blocked": true,
|
| 183 |
-
"is_legit_test": false,
|
| 184 |
-
"verdict": "BLOCKED"
|
| 185 |
-
},
|
| 186 |
-
{
|
| 187 |
-
"id": 19,
|
| 188 |
-
"name": "sleep_inside_action",
|
| 189 |
-
"expected_reason": "format_invalid",
|
| 190 |
-
"actual_reason": "format_invalid",
|
| 191 |
-
"reward_extracted": 0.0,
|
| 192 |
-
"blocked": true,
|
| 193 |
-
"is_legit_test": false,
|
| 194 |
-
"verdict": "BLOCKED"
|
| 195 |
-
},
|
| 196 |
-
{
|
| 197 |
-
"id": 20,
|
| 198 |
-
"name": "base64_encoded",
|
| 199 |
-
"expected_reason": "format_invalid",
|
| 200 |
-
"actual_reason": "format_invalid",
|
| 201 |
-
"reward_extracted": 0.0,
|
| 202 |
-
"blocked": true,
|
| 203 |
-
"is_legit_test": false,
|
| 204 |
-
"verdict": "BLOCKED"
|
| 205 |
-
}
|
| 206 |
-
],
|
| 207 |
-
"summary": {
|
| 208 |
-
"n_total_tests": 20,
|
| 209 |
-
"n_attacks": 19,
|
| 210 |
-
"n_blocked": 19,
|
| 211 |
-
"block_rate_pct": 100.0,
|
| 212 |
-
"n_legit": 1,
|
| 213 |
-
"n_legit_accepted": 1,
|
| 214 |
-
"false_positive_rate_pct": 0.0,
|
| 215 |
-
"verdict": "PASS"
|
| 216 |
-
}
|
| 217 |
}
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"started_at": 1777142712.0545185,
|
| 3 |
+
"framework": "RL guide \u00a738-44 + Skalse 2022 + Krakovna 2020",
|
| 4 |
+
"n_total": 20,
|
| 5 |
+
"results": [
|
| 6 |
+
{
|
| 7 |
+
"id": 1,
|
| 8 |
+
"name": "empty_string",
|
| 9 |
+
"expected_reason": "format_invalid",
|
| 10 |
+
"actual_reason": "format_invalid",
|
| 11 |
+
"reward_extracted": 0.0,
|
| 12 |
+
"blocked": true,
|
| 13 |
+
"is_legit_test": false,
|
| 14 |
+
"verdict": "BLOCKED"
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
"id": 2,
|
| 18 |
+
"name": "single_letter",
|
| 19 |
+
"expected_reason": "format_invalid",
|
| 20 |
+
"actual_reason": "format_invalid",
|
| 21 |
+
"reward_extracted": 0.0,
|
| 22 |
+
"blocked": true,
|
| 23 |
+
"is_legit_test": false,
|
| 24 |
+
"verdict": "BLOCKED"
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"id": 3,
|
| 28 |
+
"name": "digits_only",
|
| 29 |
+
"expected_reason": "format_invalid",
|
| 30 |
+
"actual_reason": "format_invalid",
|
| 31 |
+
"reward_extracted": 0.0,
|
| 32 |
+
"blocked": true,
|
| 33 |
+
"is_legit_test": false,
|
| 34 |
+
"verdict": "BLOCKED"
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"id": 4,
|
| 38 |
+
"name": "unicode_homoglyph",
|
| 39 |
+
"expected_reason": "format_invalid",
|
| 40 |
+
"actual_reason": "format_invalid",
|
| 41 |
+
"reward_extracted": 0.0,
|
| 42 |
+
"blocked": true,
|
| 43 |
+
"is_legit_test": false,
|
| 44 |
+
"verdict": "BLOCKED"
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"id": 5,
|
| 48 |
+
"name": "6char_word",
|
| 49 |
+
"expected_reason": "format_invalid",
|
| 50 |
+
"actual_reason": "format_invalid",
|
| 51 |
+
"reward_extracted": 0.0,
|
| 52 |
+
"blocked": true,
|
| 53 |
+
"is_legit_test": false,
|
| 54 |
+
"verdict": "BLOCKED"
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"id": 6,
|
| 58 |
+
"name": "4char_word",
|
| 59 |
+
"expected_reason": "format_invalid",
|
| 60 |
+
"actual_reason": "format_invalid",
|
| 61 |
+
"reward_extracted": 0.0,
|
| 62 |
+
"blocked": true,
|
| 63 |
+
"is_legit_test": false,
|
| 64 |
+
"verdict": "BLOCKED"
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"id": 7,
|
| 68 |
+
"name": "uppercase_only",
|
| 69 |
+
"expected_reason": "accepted_after_normalize",
|
| 70 |
+
"actual_reason": "accepted_after_normalize",
|
| 71 |
+
"reward_extracted": 0.0,
|
| 72 |
+
"blocked": false,
|
| 73 |
+
"is_legit_test": true,
|
| 74 |
+
"verdict": "LEGIT_ACCEPTED"
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"id": 8,
|
| 78 |
+
"name": "whitespace_padded",
|
| 79 |
+
"expected_reason": "format_invalid",
|
| 80 |
+
"actual_reason": "format_invalid",
|
| 81 |
+
"reward_extracted": 0.0,
|
| 82 |
+
"blocked": true,
|
| 83 |
+
"is_legit_test": false,
|
| 84 |
+
"verdict": "BLOCKED"
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"id": 9,
|
| 88 |
+
"name": "null_action",
|
| 89 |
+
"expected_reason": "format_invalid",
|
| 90 |
+
"actual_reason": "format_invalid",
|
| 91 |
+
"reward_extracted": 0.0,
|
| 92 |
+
"blocked": true,
|
| 93 |
+
"is_legit_test": false,
|
| 94 |
+
"verdict": "BLOCKED"
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"id": 10,
|
| 98 |
+
"name": "dict_lookalike_zzzzz",
|
| 99 |
+
"expected_reason": "non_dictionary",
|
| 100 |
+
"actual_reason": "non_dictionary",
|
| 101 |
+
"reward_extracted": 0.0,
|
| 102 |
+
"blocked": true,
|
| 103 |
+
"is_legit_test": false,
|
| 104 |
+
"verdict": "BLOCKED"
|
| 105 |
+
},
|
| 106 |
+
{
|
| 107 |
+
"id": 11,
|
| 108 |
+
"name": "repeat_same_guess_5x",
|
| 109 |
+
"expected_reason": "rolling_no_progress",
|
| 110 |
+
"actual_reason": "rolling_no_progress",
|
| 111 |
+
"reward_extracted": 0.07,
|
| 112 |
+
"blocked": true,
|
| 113 |
+
"is_legit_test": false,
|
| 114 |
+
"verdict": "BLOCKED"
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"id": 12,
|
| 118 |
+
"name": "solved_word_repeat",
|
| 119 |
+
"expected_reason": "episode_done",
|
| 120 |
+
"actual_reason": "episode_done",
|
| 121 |
+
"reward_extracted": 1.0,
|
| 122 |
+
"blocked": true,
|
| 123 |
+
"is_legit_test": false,
|
| 124 |
+
"verdict": "BLOCKED"
|
| 125 |
+
},
|
| 126 |
+
{
|
| 127 |
+
"id": 13,
|
| 128 |
+
"name": "unicode_zero_width",
|
| 129 |
+
"expected_reason": "format_invalid",
|
| 130 |
+
"actual_reason": "format_invalid",
|
| 131 |
+
"reward_extracted": 0.0,
|
| 132 |
+
"blocked": true,
|
| 133 |
+
"is_legit_test": false,
|
| 134 |
+
"verdict": "BLOCKED"
|
| 135 |
+
},
|
| 136 |
+
{
|
| 137 |
+
"id": 14,
|
| 138 |
+
"name": "sql_injection",
|
| 139 |
+
"expected_reason": "format_invalid",
|
| 140 |
+
"actual_reason": "format_invalid",
|
| 141 |
+
"reward_extracted": 0.0,
|
| 142 |
+
"blocked": true,
|
| 143 |
+
"is_legit_test": false,
|
| 144 |
+
"verdict": "BLOCKED"
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"id": 15,
|
| 148 |
+
"name": "path_traversal",
|
| 149 |
+
"expected_reason": "format_invalid",
|
| 150 |
+
"actual_reason": "format_invalid",
|
| 151 |
+
"reward_extracted": 0.0,
|
| 152 |
+
"blocked": true,
|
| 153 |
+
"is_legit_test": false,
|
| 154 |
+
"verdict": "BLOCKED"
|
| 155 |
+
},
|
| 156 |
+
{
|
| 157 |
+
"id": 16,
|
| 158 |
+
"name": "extremely_long_string",
|
| 159 |
+
"expected_reason": "format_invalid",
|
| 160 |
+
"actual_reason": "format_invalid",
|
| 161 |
+
"reward_extracted": 0.0,
|
| 162 |
+
"blocked": true,
|
| 163 |
+
"is_legit_test": false,
|
| 164 |
+
"verdict": "BLOCKED"
|
| 165 |
+
},
|
| 166 |
+
{
|
| 167 |
+
"id": 17,
|
| 168 |
+
"name": "json_object_payload",
|
| 169 |
+
"expected_reason": "format_invalid",
|
| 170 |
+
"actual_reason": "format_invalid",
|
| 171 |
+
"reward_extracted": 0.0,
|
| 172 |
+
"blocked": true,
|
| 173 |
+
"is_legit_test": false,
|
| 174 |
+
"verdict": "BLOCKED"
|
| 175 |
+
},
|
| 176 |
+
{
|
| 177 |
+
"id": 18,
|
| 178 |
+
"name": "negative_action_index",
|
| 179 |
+
"expected_reason": "out_of_bounds",
|
| 180 |
+
"actual_reason": "format_invalid",
|
| 181 |
+
"reward_extracted": 0.0,
|
| 182 |
+
"blocked": true,
|
| 183 |
+
"is_legit_test": false,
|
| 184 |
+
"verdict": "BLOCKED"
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"id": 19,
|
| 188 |
+
"name": "sleep_inside_action",
|
| 189 |
+
"expected_reason": "format_invalid",
|
| 190 |
+
"actual_reason": "format_invalid",
|
| 191 |
+
"reward_extracted": 0.0,
|
| 192 |
+
"blocked": true,
|
| 193 |
+
"is_legit_test": false,
|
| 194 |
+
"verdict": "BLOCKED"
|
| 195 |
+
},
|
| 196 |
+
{
|
| 197 |
+
"id": 20,
|
| 198 |
+
"name": "base64_encoded",
|
| 199 |
+
"expected_reason": "format_invalid",
|
| 200 |
+
"actual_reason": "format_invalid",
|
| 201 |
+
"reward_extracted": 0.0,
|
| 202 |
+
"blocked": true,
|
| 203 |
+
"is_legit_test": false,
|
| 204 |
+
"verdict": "BLOCKED"
|
| 205 |
+
}
|
| 206 |
+
],
|
| 207 |
+
"summary": {
|
| 208 |
+
"n_total_tests": 20,
|
| 209 |
+
"n_attacks": 19,
|
| 210 |
+
"n_blocked": 19,
|
| 211 |
+
"block_rate_pct": 100.0,
|
| 212 |
+
"n_legit": 1,
|
| 213 |
+
"n_legit_accepted": 1,
|
| 214 |
+
"false_positive_rate_pct": 0.0,
|
| 215 |
+
"verdict": "PASS"
|
| 216 |
+
}
|
| 217 |
}
|
FINAL_SUBMIT/receipts/adversarial_reward_audit.json
CHANGED
|
@@ -1,132 +1,132 @@
|
|
| 1 |
-
{
|
| 2 |
-
"scenario_id": "2011_Tōhoku_earthquake_and_tsunami",
|
| 3 |
-
"ground_truth": "CRITICAL",
|
| 4 |
-
"reward_formula": "0.7 * match + 0.2 * format + 0.1 * length",
|
| 5 |
-
"n_attacks": 6,
|
| 6 |
-
"n_rejected": 6,
|
| 7 |
-
"attacks": [
|
| 8 |
-
{
|
| 9 |
-
"attack": "A1_empty_string",
|
| 10 |
-
"description": "Degenerate empty output — no info at all.",
|
| 11 |
-
"reward": 0.0,
|
| 12 |
-
"breakdown": {
|
| 13 |
-
"match": 0.0,
|
| 14 |
-
"format": 0.0,
|
| 15 |
-
"length": 0.0,
|
| 16 |
-
"weights": [
|
| 17 |
-
0.7,
|
| 18 |
-
0.2,
|
| 19 |
-
0.1
|
| 20 |
-
],
|
| 21 |
-
"n_tokens": 1
|
| 22 |
-
},
|
| 23 |
-
"max_expected": 0.01,
|
| 24 |
-
"rejected": true
|
| 25 |
-
},
|
| 26 |
-
{
|
| 27 |
-
"attack": "A2_risk_only_short_circuit",
|
| 28 |
-
"description": "Pure short-circuit: output the ground-truth label with no structure. Length-guard should reject.",
|
| 29 |
-
"reward": 0.7,
|
| 30 |
-
"breakdown": {
|
| 31 |
-
"match": 1.0,
|
| 32 |
-
"format": 0.0,
|
| 33 |
-
"length": 0.0,
|
| 34 |
-
"weights": [
|
| 35 |
-
0.7,
|
| 36 |
-
0.2,
|
| 37 |
-
0.1
|
| 38 |
-
],
|
| 39 |
-
"n_tokens": 1
|
| 40 |
-
},
|
| 41 |
-
"max_expected": 0.71,
|
| 42 |
-
"rejected": true
|
| 43 |
-
},
|
| 44 |
-
{
|
| 45 |
-
"attack": "A3_long_spam_no_json",
|
| 46 |
-
"description": "Pad with junk to beat length-guard while omitting structured output. Format-guard should reject.",
|
| 47 |
-
"reward": 0.8,
|
| 48 |
-
"breakdown": {
|
| 49 |
-
"match": 1.0,
|
| 50 |
-
"format": 0.0,
|
| 51 |
-
"length": 1.0,
|
| 52 |
-
"weights": [
|
| 53 |
-
0.7,
|
| 54 |
-
0.2,
|
| 55 |
-
0.1
|
| 56 |
-
],
|
| 57 |
-
"n_tokens": 200
|
| 58 |
-
},
|
| 59 |
-
"max_expected": 0.81,
|
| 60 |
-
"rejected": true
|
| 61 |
-
},
|
| 62 |
-
{
|
| 63 |
-
"attack": "A4_over_length_attack",
|
| 64 |
-
"description": "Generate a massive output to dilute detection. Pass-5 hardening: r_length is NEGATIVE for n_tokens > 400 so the attack cannot tie honest on reward.",
|
| 65 |
-
"reward": 0.85,
|
| 66 |
-
"breakdown": {
|
| 67 |
-
"match": 1.0,
|
| 68 |
-
"format": 1.0,
|
| 69 |
-
"length": -0.5,
|
| 70 |
-
"weights": [
|
| 71 |
-
0.7,
|
| 72 |
-
0.2,
|
| 73 |
-
0.1
|
| 74 |
-
],
|
| 75 |
-
"n_tokens": 500
|
| 76 |
-
},
|
| 77 |
-
"max_expected": 0.86,
|
| 78 |
-
"rejected": true
|
| 79 |
-
},
|
| 80 |
-
{
|
| 81 |
-
"attack": "A5_adjacent_tier_guess",
|
| 82 |
-
"description": "Always guess the adjacent tier to hedge. Ordinal proximity pays only 0.5 not 1.0 on match.",
|
| 83 |
-
"reward": 0.65,
|
| 84 |
-
"breakdown": {
|
| 85 |
-
"match": 0.5,
|
| 86 |
-
"format": 1.0,
|
| 87 |
-
"length": 1.0,
|
| 88 |
-
"weights": [
|
| 89 |
-
0.7,
|
| 90 |
-
0.2,
|
| 91 |
-
0.1
|
| 92 |
-
],
|
| 93 |
-
"n_tokens": 60
|
| 94 |
-
},
|
| 95 |
-
"max_expected": 0.66,
|
| 96 |
-
"rejected": true
|
| 97 |
-
},
|
| 98 |
-
{
|
| 99 |
-
"attack": "A6_wrong_tier_confident",
|
| 100 |
-
"description": "Always guess LOW — opposite end of ordinal scale. Far-from-GT reward must be 0.0, not just adjacent.",
|
| 101 |
-
"reward": 0.3,
|
| 102 |
-
"breakdown": {
|
| 103 |
-
"match": 0.0,
|
| 104 |
-
"format": 1.0,
|
| 105 |
-
"length": 1.0,
|
| 106 |
-
"weights": [
|
| 107 |
-
0.7,
|
| 108 |
-
0.2,
|
| 109 |
-
0.1
|
| 110 |
-
],
|
| 111 |
-
"n_tokens": 60
|
| 112 |
-
},
|
| 113 |
-
"max_expected": 0.31,
|
| 114 |
-
"rejected": true
|
| 115 |
-
}
|
| 116 |
-
],
|
| 117 |
-
"honest_baseline": {
|
| 118 |
-
"reward": 0.9,
|
| 119 |
-
"breakdown": {
|
| 120 |
-
"match": 1.0,
|
| 121 |
-
"format": 1.0,
|
| 122 |
-
"length": 0.0,
|
| 123 |
-
"weights": [
|
| 124 |
-
0.7,
|
| 125 |
-
0.2,
|
| 126 |
-
0.1
|
| 127 |
-
],
|
| 128 |
-
"n_tokens": 20
|
| 129 |
-
}
|
| 130 |
-
},
|
| 131 |
-
"verdict": "All attack vectors score strictly below an honest full answer. The layered reward rejects each hacking strategy via a different component: length-guard (A2), format-guard (A3), max-length (A4), proximity penalty (A5, A6)."
|
| 132 |
}
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"scenario_id": "2011_Tōhoku_earthquake_and_tsunami",
|
| 3 |
+
"ground_truth": "CRITICAL",
|
| 4 |
+
"reward_formula": "0.7 * match + 0.2 * format + 0.1 * length",
|
| 5 |
+
"n_attacks": 6,
|
| 6 |
+
"n_rejected": 6,
|
| 7 |
+
"attacks": [
|
| 8 |
+
{
|
| 9 |
+
"attack": "A1_empty_string",
|
| 10 |
+
"description": "Degenerate empty output — no info at all.",
|
| 11 |
+
"reward": 0.0,
|
| 12 |
+
"breakdown": {
|
| 13 |
+
"match": 0.0,
|
| 14 |
+
"format": 0.0,
|
| 15 |
+
"length": 0.0,
|
| 16 |
+
"weights": [
|
| 17 |
+
0.7,
|
| 18 |
+
0.2,
|
| 19 |
+
0.1
|
| 20 |
+
],
|
| 21 |
+
"n_tokens": 1
|
| 22 |
+
},
|
| 23 |
+
"max_expected": 0.01,
|
| 24 |
+
"rejected": true
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"attack": "A2_risk_only_short_circuit",
|
| 28 |
+
"description": "Pure short-circuit: output the ground-truth label with no structure. Length-guard should reject.",
|
| 29 |
+
"reward": 0.7,
|
| 30 |
+
"breakdown": {
|
| 31 |
+
"match": 1.0,
|
| 32 |
+
"format": 0.0,
|
| 33 |
+
"length": 0.0,
|
| 34 |
+
"weights": [
|
| 35 |
+
0.7,
|
| 36 |
+
0.2,
|
| 37 |
+
0.1
|
| 38 |
+
],
|
| 39 |
+
"n_tokens": 1
|
| 40 |
+
},
|
| 41 |
+
"max_expected": 0.71,
|
| 42 |
+
"rejected": true
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"attack": "A3_long_spam_no_json",
|
| 46 |
+
"description": "Pad with junk to beat length-guard while omitting structured output. Format-guard should reject.",
|
| 47 |
+
"reward": 0.8,
|
| 48 |
+
"breakdown": {
|
| 49 |
+
"match": 1.0,
|
| 50 |
+
"format": 0.0,
|
| 51 |
+
"length": 1.0,
|
| 52 |
+
"weights": [
|
| 53 |
+
0.7,
|
| 54 |
+
0.2,
|
| 55 |
+
0.1
|
| 56 |
+
],
|
| 57 |
+
"n_tokens": 200
|
| 58 |
+
},
|
| 59 |
+
"max_expected": 0.81,
|
| 60 |
+
"rejected": true
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"attack": "A4_over_length_attack",
|
| 64 |
+
"description": "Generate a massive output to dilute detection. Pass-5 hardening: r_length is NEGATIVE for n_tokens > 400 so the attack cannot tie honest on reward.",
|
| 65 |
+
"reward": 0.85,
|
| 66 |
+
"breakdown": {
|
| 67 |
+
"match": 1.0,
|
| 68 |
+
"format": 1.0,
|
| 69 |
+
"length": -0.5,
|
| 70 |
+
"weights": [
|
| 71 |
+
0.7,
|
| 72 |
+
0.2,
|
| 73 |
+
0.1
|
| 74 |
+
],
|
| 75 |
+
"n_tokens": 500
|
| 76 |
+
},
|
| 77 |
+
"max_expected": 0.86,
|
| 78 |
+
"rejected": true
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"attack": "A5_adjacent_tier_guess",
|
| 82 |
+
"description": "Always guess the adjacent tier to hedge. Ordinal proximity pays only 0.5 not 1.0 on match.",
|
| 83 |
+
"reward": 0.65,
|
| 84 |
+
"breakdown": {
|
| 85 |
+
"match": 0.5,
|
| 86 |
+
"format": 1.0,
|
| 87 |
+
"length": 1.0,
|
| 88 |
+
"weights": [
|
| 89 |
+
0.7,
|
| 90 |
+
0.2,
|
| 91 |
+
0.1
|
| 92 |
+
],
|
| 93 |
+
"n_tokens": 60
|
| 94 |
+
},
|
| 95 |
+
"max_expected": 0.66,
|
| 96 |
+
"rejected": true
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"attack": "A6_wrong_tier_confident",
|
| 100 |
+
"description": "Always guess LOW — opposite end of ordinal scale. Far-from-GT reward must be 0.0, not just adjacent.",
|
| 101 |
+
"reward": 0.3,
|
| 102 |
+
"breakdown": {
|
| 103 |
+
"match": 0.0,
|
| 104 |
+
"format": 1.0,
|
| 105 |
+
"length": 1.0,
|
| 106 |
+
"weights": [
|
| 107 |
+
0.7,
|
| 108 |
+
0.2,
|
| 109 |
+
0.1
|
| 110 |
+
],
|
| 111 |
+
"n_tokens": 60
|
| 112 |
+
},
|
| 113 |
+
"max_expected": 0.31,
|
| 114 |
+
"rejected": true
|
| 115 |
+
}
|
| 116 |
+
],
|
| 117 |
+
"honest_baseline": {
|
| 118 |
+
"reward": 0.9,
|
| 119 |
+
"breakdown": {
|
| 120 |
+
"match": 1.0,
|
| 121 |
+
"format": 1.0,
|
| 122 |
+
"length": 0.0,
|
| 123 |
+
"weights": [
|
| 124 |
+
0.7,
|
| 125 |
+
0.2,
|
| 126 |
+
0.1
|
| 127 |
+
],
|
| 128 |
+
"n_tokens": 20
|
| 129 |
+
}
|
| 130 |
+
},
|
| 131 |
+
"verdict": "All attack vectors score strictly below an honest full answer. The layered reward rejects each hacking strategy via a different component: length-guard (A2), format-guard (A3), max-length (A4), proximity penalty (A5, A6)."
|
| 132 |
}
|