Open_SLM_Leaderboard / index.html
Datdanboi25's picture
added kirk-tung
dabc2bd
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Open SLM Leaderboard</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=DM+Sans:ital,opsz,wght@0,9..40,300;0,9..40,400;0,9..40,500;0,9..40,600;0,9..40,700;1,9..40,400&family=Space+Mono:wght@400;700&display=swap" rel="stylesheet">
<script src="https://cdn.jsdelivr.net/npm/chart.js@4"></script>
<style>
:root {
--bg: #0a0a0b;
--bg-alt: #111113;
--surface: #141416;
--surface-hover: #1a1a1d;
--border: #232326;
--border-subtle: #19191c;
--border-strong: #2e2e32;
--text: #e8e8e8;
--text-secondary: #6b6b6e;
--text-muted: #454548;
--accent: #c2b6ff;
--accent-muted: #7a6fb0;
--accent-soft: rgba(194, 182, 255, 0.08);
--best: #4ade80;
--worst: #454548;
}
* { margin: 0; padding: 0; box-sizing: border-box; }
html { scroll-behavior: smooth; }
body {
font-family: 'DM Sans', system-ui, -apple-system, sans-serif;
background: var(--bg);
color: var(--text);
line-height: 1.6;
-webkit-font-smoothing: antialiased;
}
.container { max-width: 1100px; margin: 0 auto; padding: 0 40px; }
/* ─── Hero ─── */
.hero { padding: 80px 0 60px; text-align: center; }
.hero h1 {
font-size: clamp(44px, 7vw, 80px);
font-weight: 300;
letter-spacing: -3px;
line-height: 1.0;
}
.hero h1 span { color: var(--accent); }
.hero-sub {
font-size: 17px;
color: var(--text-secondary);
margin-top: 18px;
max-width: 520px;
margin-left: auto;
margin-right: auto;
font-weight: 400;
letter-spacing: -0.2px;
}
.hero-sub a {
color: var(--accent);
text-decoration: none;
border-bottom: 1px solid transparent;
transition: border-color 0.15s;
}
.hero-sub a:hover {
border-bottom-color: var(--accent);
}
.hero-note {
margin-top: 40px;
background: var(--surface);
border: 1px solid var(--border-subtle);
border-left: 3px solid var(--accent);
border-radius: 0 10px 10px 0;
padding: 20px 28px;
text-align: left;
font-size: 14px;
color: var(--text-secondary);
line-height: 1.7;
}
.hero-note strong { color: var(--text); font-weight: 600; }
.hero-note a {
color: var(--accent);
text-decoration: none;
border-bottom: 1px solid transparent;
transition: border-color 0.15s;
}
.hero-note a:hover { border-bottom-color: var(--accent); }
/* ─── Section Headings ─── */
.section-title {
font-size: clamp(36px, 5vw, 56px);
font-weight: 300;
letter-spacing: -2px;
line-height: 1.05;
margin-bottom: 8px;
}
.section-sub {
font-size: 14px;
color: var(--text-secondary);
margin-bottom: 24px;
font-weight: 400;
}
/* ─── Highlights ─── */
.insight-grid {
display: grid;
grid-template-columns: repeat(4, minmax(0, 1fr));
gap: 12px;
}
.insight-card {
min-height: 118px;
background: var(--surface);
border: 1px solid var(--border-subtle);
border-top: 2px solid var(--border-strong);
border-radius: 8px;
padding: 16px;
}
.insight-card:hover {
background: var(--surface-hover);
border-color: var(--border);
}
.insight-label {
display: block;
font-family: 'Space Mono', monospace;
font-size: 10px;
letter-spacing: 0.08em;
text-transform: uppercase;
color: var(--text-muted);
margin-bottom: 10px;
}
.insight-value {
display: block;
color: var(--text);
font-size: 20px;
font-weight: 500;
line-height: 1.1;
letter-spacing: -0.4px;
overflow-wrap: anywhere;
}
a.insight-value {
text-decoration: none;
transition: color 0.15s;
}
a.insight-value:hover {
color: var(--accent);
}
.insight-value.stat {
font-family: 'Space Mono', monospace;
font-size: 28px;
letter-spacing: -1px;
}
.insight-score {
display: block;
margin-top: 10px;
color: var(--best);
font-family: 'Space Mono', monospace;
font-size: 12px;
font-weight: 700;
}
.insight-meta {
display: block;
margin-top: 4px;
color: var(--text-muted);
font-size: 11px;
line-height: 1.4;
}
.insight-meta a {
color: var(--text-muted);
text-decoration: none;
border-bottom: 1px solid transparent;
}
.insight-meta a:hover {
color: var(--accent-muted);
border-bottom-color: var(--accent-muted);
}
/* ─── Param Filters ─── */
.filter-bar {
display: flex;
gap: 8px;
margin-bottom: 12px;
flex-wrap: wrap;
}
.filter-group {
margin-bottom: 18px;
}
.filter-label {
display: block;
font-family: 'Space Mono', monospace;
font-size: 10px;
letter-spacing: 0.08em;
text-transform: uppercase;
color: var(--text-muted);
margin-bottom: 8px;
}
.filter-btn {
font-family: 'Space Mono', monospace;
font-size: 11px;
font-weight: 400;
letter-spacing: 0.02em;
padding: 5px 12px;
border-radius: 6px;
border: 1px solid var(--border);
background: transparent;
color: var(--text-secondary);
cursor: pointer;
transition: all 0.15s;
}
.filter-btn:hover {
border-color: var(--accent-muted);
color: var(--text);
}
.filter-btn.active {
border-color: var(--accent);
background: var(--accent-soft);
color: var(--accent);
}
/* ─── Table ─── */
.table-wrap {
overflow-x: auto;
border: 1px solid var(--border-subtle);
border-radius: 10px;
background: var(--surface);
}
table { width: 100%; border-collapse: collapse; font-size: 14px; }
thead th {
padding: 12px 12px;
text-align: center;
font-size: 10px;
font-weight: 600;
letter-spacing: 0.08em;
text-transform: uppercase;
color: var(--text-muted);
border-bottom: 1px solid var(--border);
cursor: pointer;
user-select: none;
white-space: nowrap;
transition: color 0.15s;
font-family: 'Space Mono', monospace;
}
thead th:hover { color: var(--accent); }
thead th:first-child, tbody td:first-child { text-align: left; }
thead th .sort-indicator {
margin-left: 3px;
opacity: 0;
font-size: 9px;
color: var(--accent);
}
thead th.sorted { color: var(--accent); }
thead th.sorted .sort-indicator { opacity: 1; }
tbody tr {
border-bottom: 1px solid var(--border-subtle);
transition: background 0.12s;
}
tbody tr:last-child { border-bottom: none; }
tbody tr:hover { background: var(--accent-soft); }
tbody td {
padding: 12px 12px;
text-align: center;
vertical-align: middle;
}
.td-rank {
font-weight: 700;
font-size: 12px;
color: var(--accent-muted);
width: 32px;
font-family: 'Space Mono', monospace;
}
.org-title-line {
display: inline-flex;
align-items: center;
gap: 8px;
}
.rank-move {
display: inline-flex;
align-items: center;
justify-content: center;
min-width: 30px;
height: 17px;
padding: 0 5px;
border-radius: 4px;
border: 1px solid var(--border-subtle);
color: var(--text-muted);
background: rgba(255,255,255,0.03);
font-size: 9px;
line-height: 1;
}
.rank-move.up {
color: var(--best);
border-color: rgba(74,222,128,0.28);
background: rgba(74,222,128,0.08);
}
.rank-move.down {
color: #f87171;
border-color: rgba(248,113,113,0.26);
background: rgba(248,113,113,0.07);
}
.rank-move.new {
color: var(--accent-muted);
border-color: rgba(194,182,255,0.30);
background: rgba(194,182,255,0.08);
}
.td-model {
text-align: left;
font-weight: 500;
font-size: 14px;
color: var(--text);
}
.td-model a {
color: var(--text);
text-decoration: none;
transition: color 0.15s;
}
.td-model a:hover { color: var(--accent); }
.td-model .model-org {
display: block;
font-size: 11px;
color: var(--text-muted);
font-weight: 400;
margin-top: 1px;
}
.td-model .model-org a {
color: var(--text-muted);
text-decoration: none;
transition: color 0.15s, border-color 0.15s;
border-bottom: 1px solid transparent;
}
.td-model .model-org a:hover {
color: var(--accent-muted);
border-bottom-color: var(--accent-muted);
}
.td-score {
font-family: 'Space Mono', monospace;
font-size: 12px;
letter-spacing: -0.2px;
}
.td-score.best {
color: var(--best);
font-weight: 700;
}
.td-score.na {
color: var(--text-muted);
font-style: normal;
}
.td-params {
font-family: 'Space Mono', monospace;
font-size: 11px;
color: var(--text-secondary);
}
.org-badge {
display: inline-block;
font-size: 10px;
font-weight: 600;
padding: 2px 7px;
border-radius: 4px;
letter-spacing: 0.03em;
font-family: 'Space Mono', monospace;
}
/* ─── Scatter Plot ─── */
.legend-bar {
display: flex;
gap: 20px;
margin-bottom: 24px;
flex-wrap: wrap;
}
.legend-item {
display: flex;
align-items: center;
gap: 6px;
font-size: 12px;
color: var(--text-secondary);
}
.legend-dot { width: 8px; height: 8px; border-radius: 50%; }
.chart-card {
background: var(--surface);
border: 1px solid var(--border-subtle);
border-radius: 10px;
padding: 20px;
}
.chart-card h3 {
font-size: 12px;
font-weight: 500;
color: var(--text-secondary);
margin-bottom: 14px;
letter-spacing: 0.02em;
}
.chart-card canvas {
max-height: 460px;
}
/* ─── About ─── */
.about-box {
background: var(--surface);
border: 1px solid var(--border-subtle);
border-left: 3px solid var(--accent);
border-radius: 0 10px 10px 0;
padding: 28px 32px;
}
.about-box h3 {
font-size: 20px;
font-weight: 500;
margin-bottom: 10px;
color: var(--text);
letter-spacing: -0.5px;
}
.about-box p {
font-size: 14px;
color: var(--text-secondary);
line-height: 1.7;
}
.about-box a {
color: var(--accent);
text-decoration: none;
border-bottom: 1px solid transparent;
transition: border-color 0.15s;
}
.about-box a:hover { border-bottom-color: var(--accent); }
/* ─── Footer ─── */
footer {
margin-top: 100px;
padding: 32px 0;
border-top: 1px solid var(--border-subtle);
display: flex;
justify-content: space-between;
align-items: center;
font-size: 11px;
color: var(--text-muted);
font-family: 'Space Mono', monospace;
}
footer a {
color: var(--text-secondary);
text-decoration: none;
margin-left: 16px;
transition: color 0.15s;
}
footer a:hover { color: var(--accent); }
/* ─── Mobile ─── */
@media (max-width: 768px) {
.container { padding: 0 20px; }
.hero { padding: 60px 0 40px; }
.insight-grid { grid-template-columns: repeat(2, minmax(0, 1fr)); }
.chart-card canvas { max-height: 360px; }
footer { flex-direction: column; gap: 12px; text-align: center; }
footer a { margin: 0 8px; }
}
@media (max-width: 520px) {
.insight-grid { grid-template-columns: 1fr; }
}
</style>
<base target="_blank">
</head>
<body>
<div class="container">
<!-- HERO -->
<section class="hero">
<h1>Open <span>SLM Leaderboard</span></h1>
<p class="hero-sub">
A leaderboard for sub-150M parameter language models, evaluated using LM-eval harness
or a custom benchmark script available here
<a href="https://huggingface.co/datasets/axiomiclabs/Arithmark-2.0" target="_blank" rel="noopener noreferrer">Arithmark-2.0</a>.
</p>
</section>
<!-- HIGHLIGHTS -->
<section id="highlights" style="padding-top:20px;">
<div class="insight-grid" id="insight-grid"></div>
</section>
<!-- LEADERBOARD TABLE -->
<section id="leaderboard" style="padding-top:40px;">
<h2 class="section-title">Leaderboard</h2>
<p class="section-sub">Zero-shot evaluation. Higher is better for all columns. Click any header to sort.</p>
<div class="filter-group">
<span class="filter-label">Model size</span>
<div class="filter-bar" id="filter-bar"></div>
</div>
<div class="table-wrap">
<table id="leaderboard-table">
<thead>
<tr>
<th onclick="sortTable('rank')"># <span class="sort-indicator">β–Ό</span></th>
<th onclick="sortTable('name')">Model <span class="sort-indicator"></span></th>
<th onclick="sortTable('params')">Params <span class="sort-indicator"></span></th>
<th onclick="sortTable('avg')">Avg <span class="sort-indicator"></span></th>
<th onclick="sortTable('hellaswag')">HellaSwag <span class="sort-indicator"></span></th>
<th onclick="sortTable('arc')">ARC-Easy <span class="sort-indicator"></span></th>
<th onclick="sortTable('arcChall')">ARC-Challenge <span class="sort-indicator"></span></th>
<th onclick="sortTable('piqa')">PIQA <span class="sort-indicator"></span></th>
<th onclick="sortTable('arithmark2')">ArithMark-2 <span class="sort-indicator"></span></th>
</tr>
</thead>
<tbody id="leaderboard-body"></tbody>
</table>
</div>
</section>
<!-- BAR CHART -->
<section id="score-chart" style="padding-top:80px;">
<h2 class="section-title">Scores</h2>
<p class="section-sub" id="score-chart-sub">Top scores for the active size and benchmark filters.</p>
<div class="filter-group">
<span class="filter-label">Benchmark</span>
<div class="filter-bar" id="benchmark-filter-bar"></div>
</div>
<div class="chart-card">
<h3>Top Avg Scores</h3>
<canvas id="barChart"></canvas>
</div>
</section>
<!-- SCATTER PLOT -->
<section id="charts" style="padding-top:80px;">
<h2 class="section-title">Efficiency</h2>
<p class="section-sub" id="efficiency-sub">Score vs parameter count (log scale). Shaded zone = above regression line.</p>
<div class="legend-bar" id="legend-bar"></div>
<div class="chart-card">
<h3>Avg Score vs Log Parameters</h3>
<canvas id="scatterChart"></canvas>
</div>
</section>
<!-- ORG PERFORMANCE -->
<section id="org-performance" style="padding-top:80px;">
<h2 class="section-title">Org Leaderboard</h2>
<p class="section-sub" id="org-leaderboard-sub">Average standard deviations above or below the score-vs-size fit line.</p>
<div class="table-wrap">
<table id="org-leaderboard-table">
<thead>
<tr>
<th>#</th>
<th>Organization</th>
<th>Models</th>
<th>Fit Std Devs</th>
<th>Mean Avg</th>
<th>Best Model vs Fit</th>
</tr>
</thead>
<tbody id="org-leaderboard-body"></tbody>
</table>
</div>
</section>
<!-- ABOUT / ADD MODEL -->
<section id="about" style="padding-top:80px;">
<div class="about-box">
<h3>Add your model </h3>
<p>Open a PR on this Space with your model's results for the given benchmarks. They will be independently verified by our team and then your PR will be merged. Your model must be open weights to qualify. <a href="https://huggingface.co/spaces/AxiomicLabs/Open_SLM_Leaderboard/discussions" target="_blank">Open a PR β†’</a></p>
</div>
</section>
<!-- FOOTER -->
<footer>
<div>Open SLM Leaderboard by <a href="https://huggingface.co/AxiomicLabs" style="margin:0;">Axiomic Labs</a></div>
<div>
<a href="#leaderboard">Leaderboard</a>
<a href="#score-chart">Scores</a>
<a href="#charts">Efficiency</a>
<a href="#org-performance">Organizations</a>
</div>
<div style="font-size:10px;">All results independently verified using our internal verification process.</div>
</footer>
</div>
<script>
// ═══════════════════════════════════════════════════════════════
// ═══ CENTRALIZED DATA β€” EDIT HERE TO ADD/UPDATE MODELS & ORGS
// ═══════════════════════════════════════════════════════════════
const ORGS = {
compactai: { name: 'CompactAI', chartColor: 'rgba(63, 185, 80, 0.70)', chartBorder: '#3fb950', url: 'https://huggingface.co/CompactAI-O' },
supralabs: { name: 'SupraLabs', chartColor: 'rgba(124, 58, 237, 0.70)', chartBorder: '#7c3aed', url: 'https://huggingface.co/SupraLabs' },
axiomiclabs: { name: 'Axiomic Labs', chartColor: 'rgba(194, 182, 255, 0.70)', chartBorder: '#c2b6ff', url: 'https://huggingface.co/AxiomicLabs' },
mihaipopa: { name: 'Mihai Popa', chartColor: 'rgba(247, 129, 102, 0.70)', chartBorder: '#f78166', url: 'https://huggingface.co/MihaiPopa-1' },
lhtechai: { name: 'LH-Tech-AI', chartColor: 'rgba(249, 115, 22, 0.70)', chartBorder: '#f97316', url: 'https://huggingface.co/LH-Tech-AI' },
facebook: { name: 'Facebook', chartColor: 'rgba(24, 119, 242, 0.70)', chartBorder: '#1877f2', url: 'https://huggingface.co/facebook' },
harleyml: { name: 'Harley ML', chartColor: 'rgba(153, 27, 27, 0.70)', chartBorder: '#991b1b', url: 'https://huggingface.co/Harley-ml' },
huggingface: { name: 'HuggingFace', chartColor: 'rgba(255, 204, 0, 0.70)', chartBorder: '#ffcc00', url: 'https://huggingface.co/HuggingFaceTB' },
eleutherai: { name: 'EleutherAI', chartColor: 'rgba(239, 68, 68, 0.70)', chartBorder: '#ef4444', url: 'https://huggingface.co/EleutherAI' },
openai: { name: 'OpenAI', chartColor: 'rgba(16, 163, 127, 0.70)', chartBorder: '#10a37f', url: 'https://huggingface.co/openai-community' },
stentor: { name: 'StentorLabs', chartColor: 'rgba(255, 107, 203, 0.70)', chartBorder: '#ff6bcb', url: 'https://huggingface.co/StentorLabs' },
eclipsesenpai: { name: 'Eclipse-Senpai', chartColor: 'rgba(6, 182, 212, 0.70)', chartBorder: '#06b6d4', url: 'https://huggingface.co/Eclipse-Senpai' },
godelev: { name: 'GODELEV', chartColor: 'rgba(79, 70, 229, 0.70)', chartBorder: '#4f46e5', url: 'https://huggingface.co/godelev' },
sandroeth: { name: 'Sandroeth', chartColor: 'rgba(132, 204, 22, 0.70)', chartBorder: '#84cc16', url: 'https://huggingface.co/Sandroeth' },
veyraai: { name: 'veyra-ai', chartColor: 'rgba(14, 165, 233, 0.70)', chartBorder: '#0ea5e9', url: 'https://huggingface.co/veyra-ai' },
thingai: { name: 'ThingAI', chartColor: 'rgba(180, 83, 9, 0.70)', chartBorder: '#b45309', url: 'https://huggingface.co/ThingAI' },
fromzero: { name: 'FromZero', chartColor: 'rgba(210, 180, 140, 0.70)', chartBorder: '#d2b48c', url: 'https://huggingface.co/fromziro' },
finnianx: { name: 'finnianx', chartColor: 'rgba(45, 212, 191, 0.70)', chartBorder: '#2dd4bf', url: 'https://huggingface.co/finnianx' },
joelhenwang: { name: 'joelhenwang', chartColor: 'rgba(229, 231, 235, 0.70)', chartBorder: '#e5e7eb', url: 'https://huggingface.co/joelhenwang' },
rtc2022: { name: 'RTC', chartColor: 'rgba(245, 158, 11, 0.70)', chartBorder: '#f59e0b', url: 'https://huggingface.co/rtc2022' },
};
const MODELS = [
// -- Models with full benchmark data --
{ name: 'SmolLM2-135M', org: 'huggingface', params: 135000000, paramsDisplay: '135M', arc: 58.63, hellaswag: 43.22, piqa: 68.44, arcChall: 29.69, arithmark2: 32.68, links: { card: 'https://huggingface.co/HuggingFaceTB/SmolLM2-135M' } },
{ name: 'GPT-X2-125M', org: 'axiomiclabs', params: 125000000, paramsDisplay: '125M', arc: 51.47, hellaswag: 40.41, piqa: 67.30, arcChall: 27.82, arithmark2: 30.68, links: { card: 'https://huggingface.co/AxiomicLabs/GPT-X2-125M' } },
{ name: 'GPT-X-125M', org: 'axiomiclabs', params: 125000000, paramsDisplay: '125M', arc: 50.76, hellaswag: 36.57, piqa: 64.96, arcChall: 26.62, arithmark2: 30.24, links: { card: 'https://huggingface.co/AxiomicLabs/GPT-X-125M' } },
{ name: 'MobileLLM-R1-140M-base', org: 'facebook', params: 140000000, paramsDisplay: '140M', arc: 49.92, hellaswag: 33.84, piqa: 63.22, arcChall: 24.74, arithmark2: 53.56, links: { card: 'https://huggingface.co/facebook/MobileLLM-R1-140M-base' } },
{ name: 'Supra-50M-Base', org: 'supralabs', params: 51786240, paramsDisplay: '52M', arc: 45.88, hellaswag: 31.83, piqa: 62.51, arcChall: 25.00, arithmark2: 27.04, links: { card: 'https://huggingface.co/SupraLabs/Supra-50M-Base' } },
{ name: 'Shard-1', org: 'compactai', params: 54500000, paramsDisplay: '54.5M', arc: 41.12, hellaswag: 29.20, piqa: 58.22, arcChall: 20.99, arithmark2: 26.80, links: { card: 'https://huggingface.co/CompactAI-O/Shard-1' } },
{ name: 'Supra-50M-Instruct', org: 'supralabs', params: 51786240, paramsDisplay: '52M', arc: 44.40, hellaswag: 29.09, piqa: 59.47, arcChall: 27.30, arithmark2: 29.12, links: { card: 'https://huggingface.co/SupraLabs/Supra-50M-Instruct' } },
{ name: 'Supra-50M-Reasoning', org: 'supralabs', params: 51786240, paramsDisplay: '52M', arc: 44.44, hellaswag: 29.10, piqa: 59.30, arcChall: 27.39, arithmark2: 28.96, links: { card: 'https://huggingface.co/SupraLabs/Supra-50M-Reasoning' } },
{ name: 'SmolLM-135M', org: 'huggingface', params: 135000000, paramsDisplay: '135M', arc: 56.31, hellaswag: 42.70, piqa: 68.28, arcChall: 29.01, arithmark2: 28.84, links: { card: 'https://huggingface.co/HuggingFaceTB/SmolLM-135M' } },
{ name: 'OPT-125M', org: 'facebook', params: 125000000, paramsDisplay: '125M', arc: 40.28, hellaswag: 31.31, piqa: 62.24, arcChall: 22.70, arithmark2: 24.40, links: { card: 'https://huggingface.co/facebook/opt-125m' } },
{ name: 'GPT-S-5M', org: 'axiomiclabs', params: 5160000, paramsDisplay: '5.2M', arc: 33.21, hellaswag: 27.46, piqa: 57.24, arcChall: 21.16, arithmark2: 27.12, links: { card: 'https://huggingface.co/AxiomicLabs/GPT-S-5M' } },
{ name: 'GPT-2', org: 'openai', params: 124000000, paramsDisplay: '124M', arc: 39.35, hellaswag: 31.26, piqa: 62.08, arcChall: 22.35, arithmark2: 26.48, links: { card: 'https://huggingface.co/openai-community/gpt2' } },
{ name: 'Spark-5M-Base-v4', org: 'lhtechai', params: 5000000, paramsDisplay: '5M', arc: 33.16, hellaswag: 27.03, piqa: 53.32, arcChall: 21.50, arithmark2: 25.00, links: { card: 'https://huggingface.co/LH-Tech-AI/Spark-5M-Base-v4' } },
{ name: 'Supra-Mini-v5-8M', org: 'supralabs', params: 7870000, paramsDisplay: '7.87M', arc: 33.21, hellaswag: 26.37, piqa: 54.03, arcChall: 21.16, arithmark2: 24.28, links: { card: 'https://huggingface.co/SupraLabs/Supra-Mini-v5-8M' } },
{ name: 'Pythia-70M', org: 'eleutherai', params: 70000000, paramsDisplay: '70M', arc: 31.65, hellaswag: 27.49, piqa: 53.48, arcChall: 23.63, arithmark2: 25.32, links: { card: 'https://huggingface.co/EleutherAI/pythia-70m' } },
{ name: 'Supra-Mini-v4-2M', org: 'supralabs', params: 2620000, paramsDisplay: '2.6M', arc: 30.98, hellaswag: 25.52, piqa: 51.90, arcChall: 21.50, arithmark2: 24.08, links: { card: 'https://huggingface.co/SupraLabs/Supra-Mini-v4-2M' } },
{ name: 'Pythia-31M', org: 'eleutherai', params: 31000000, paramsDisplay: '31M', arc: 33.88, hellaswag: 27.14, piqa: 56.26, arcChall: 21.67, arithmark2: 27.20, links: { card: 'https://huggingface.co/EleutherAI/pythia-31m' } },
{ name: 'Stentor3-50M', org: 'stentor', params: 50000000, paramsDisplay: '50M', arc: 29.67, hellaswag: 27.10, piqa: 53.75, arcChall: 21.67, arithmark2: 29.48, links: { card: 'https://huggingface.co/StentorLabs/Stentor3-50M' } },
{ name: 'Stentor3-20M', org: 'stentor', params: 20000000, paramsDisplay: '20M', arc: 29.50, hellaswag: 27.06, piqa: 55.06, arcChall: 23.12, arithmark2: 26.72, links: { card: 'https://huggingface.co/StentorLabs/Stentor3-20M' } },
{ name: 'Portimbria-150M', org: 'stentor', params: 151026432, paramsDisplay: '151M', arc: 35.82, hellaswag: 27.09, piqa: 58.27, arcChall: 18.77, arithmark2: 28.04, links: { card: 'https://huggingface.co/StentorLabs/Portimbria-150M' } },
{ name: 'nanowhale-100m-base', org: 'huggingface', params: 100000000, paramsDisplay: '100M', arc: 28.79, hellaswag: 26.31, piqa: 51.80, arcChall: 24.83, arithmark2: 25.20, links: { card: 'https://huggingface.co/HuggingFaceTB/nanowhale-100m-base' } },
{ name: 'Pythia-14M', org: 'eleutherai', params: 14000000, paramsDisplay: '14M', arc: 32.28, hellaswag: 26.20, piqa: 55.88, arcChall: 20.99, arithmark2: 27.04, links: { card: 'https://huggingface.co/EleutherAI/pythia-14m' } },
{ name: 'Tenete-8M', org: 'harleyml', params: 8000000, paramsDisplay: '8M', arc: 31.69, hellaswag: 26.75, piqa: 55.66, arcChall: 21.84, arithmark2: 26.72, links: { card: 'https://huggingface.co/Harley-ml/Tenete-8M' } },
{ name: 'Dillion-1.2M', org: 'harleyml', params: 1281384, paramsDisplay: '1.3M', arc: 31.19, hellaswag: 26.65, piqa: 53.05, arcChall: 22.78, arithmark2: 24.80, links: { card: 'https://huggingface.co/Harley-ml/Dillion-1.2M' } },
{ name: 'CinnabarLM-1.4M-Base', org: 'mihaipopa', params: 1510000, paramsDisplay: '1.5M', arc: 28.54, hellaswag: 27.08, piqa: 52.50, arcChall: 23.38, arithmark2: 24.96, links: { card: 'https://huggingface.co/MihaiPopa-1/CinnabarLM-1.4M-Base' } },
{ name: 'CinnabarLM-4M-Base', org: 'mihaipopa', params: 4230000, paramsDisplay: '4.2M', arc: 28.28, hellaswag: 27.71, piqa: 52.29, arcChall: 22.70, arithmark2: 24.96, links: { card: 'https://huggingface.co/MihaiPopa-1/CinnabarLM-4M-Base' } },
{ name: 'CinnabarLM-1.5M-Base', org: 'mihaipopa', params: 1710000, paramsDisplay: '1.7M', arc: 28.11, hellaswag: 27.08, piqa: 52.94, arcChall: 21.93, arithmark2: 25.20, links: { card: 'https://huggingface.co/MihaiPopa-1/CinnabarLM-1.5M-Base' } },
{ name: 'Dillionv2-1.3M', org:'harleyml', params: 1285200, paramsDisplay: '1.3M', arc: 29.71, hellaswag: 27.27, piqa: 53.05, arcChall: 22.44, arithmark2: 27.00, links: { card: 'https://huggingface.co/Harley-ml/Dillionv2-1.3M' } },
{ name: 'KeyLM-75M', org:'eclipsesenpai',params: 75251200, paramsDisplay: '75M', arc: 35.73, hellaswag: 29.66, piqa: 60.50, arcChall: 23.98, arithmark2: 25.80, links: { card: 'https://huggingface.co/Eclipse-Senpai/KeyLM-75M' } },
{ name: 'Supra-Mini-v6-1M', org: 'supralabs', params: 1410688, paramsDisplay: '1.4M', arc: 30.68, hellaswag: 27.23, piqa: 53.70, arcChall: 20.48, arithmark2: 26.48, links: { card: 'https://huggingface.co/SupraLabs/Supra-Mini-v6-1M' } },
{ name: 'GPT-S-1.4M', org: 'axiomiclabs', params: 1426000, paramsDisplay: '1.4M', arc: 31.57, hellaswag: 26.89, piqa: 55.17, arcChall: 21.93, arithmark2: 25.16, links: { card: 'https://huggingface.co/AxiomicLabs/GPT-S-1.4M' } },
{ name: 'Archaea-74M', org: 'godelev', params: 74016256, paramsDisplay: '74M', arc: 39.06, hellaswag: 27.27, piqa: 58.27, arcChall: 22.70, arithmark2: 29.20, links: { card: 'https://huggingface.co/GODELEV/Archaea-74M' } },
{ name: 'Cali-0.1B', org: 'sandroeth', params: 123782400, paramsDisplay: '124M', arc: 27.53, hellaswag: 26.84, piqa: 52.12, arcChall: 24.49, arithmark2: 24.72, links: { card: 'https://huggingface.co/Sandroeth/cali-0.1B' } },
{ name: 'Quark-50M', org: 'thingai', params: 56666496, paramsDisplay: '57M', arc: 36.78, hellaswag: 28.48, piqa: 57.83, arcChall: 25.00, arithmark2: 28.20, links: { card: 'https://huggingface.co/ThingAI/Quark-50m' } },
{ name: 'Quark-135M', org: 'thingai', params: 134561088, paramsDisplay: '135M', arc: 47.73, hellaswag: 31.33, piqa: 58.32, arcChall: 28.24, arithmark2: 40.32, links: { card: 'https://huggingface.co/ThingAI/Quark-135m' } },
{ name: 'Veyra-30M-Base', org: 'veyraai', params: 34611712, paramsDisplay: '35M', arc: 35.90, hellaswag: 27.92, piqa: 58.92, arcChall: 24.15, arithmark2: 26.76, links: { card: 'https://huggingface.co/veyra-ai/veyra-30m-base-5b-tokens' } },
{ name: 'Syn-2.6M', org: 'fromzero', params: 2604210, paramsDisplay: '2.6M', arc: 32.03, hellaswag: 26.96, piqa: 53.65, arcChall: 20.39, arithmark2: 26.68, links: { card: 'https://huggingface.co/fromziro/Syn-2.6M' } },
{ type: 'orgMovementCutoff', label: 'Org leaderboard movement cutoff' },
{ name: 'Ant-5M', org: 'godelev', params: 4713344, paramsDisplay: '4.7M', arc: 26.35, hellaswag: 25.99, piqa: 48.57, arcChall: 25.77, arithmark2: 24.80, links: { card: 'https://huggingface.co/GODELEV/Ant-5m' } },
{ name: 'Er-13M', org: 'fromzero', params: 12497520, paramsDisplay: '13M', arc: 35.10, hellaswag: 28.50, piqa: 57.51, arcChall: 20.73, arithmark2: 30.88, links: { card: 'https://huggingface.co/fromziro/Er-13M' } },
{ name: 'michel-tiny', org: 'finnianx', params: 55719040, paramsDisplay: '56M', arc: 37.37, hellaswag: 28.15, piqa: 57.34, arcChall: 21.76, arithmark2: 25.28, links: { card: 'https://huggingface.co/finnianx/michel-tiny' } },
{ name: 'OdinNext-138M-Base', org: 'joelhenwang', params: 138449696, paramsDisplay: '138M', arc: 45.08, hellaswag: 28.09, piqa: 59.52, arcChall: 23.81, arithmark2: 36.84, links: { card: 'https://huggingface.co/joelhenwang/OdinNext-138M-Base' } },
{ name: 'OdinNext-138M-Instruct', org: 'joelhenwang', params: 138451232, paramsDisplay: '138M', arc: 44.40, hellaswag: 28.86, piqa: 58.65, arcChall: 23.12, arithmark2: 36.56, links: { card: 'https://huggingface.co/joelhenwang/OdinNext-138M-Instruct' } },
{ name: 'michel-micro', org: 'finnianx', params: 28355072, paramsDisplay: '28M', arc: 38.51, hellaswag: 28.16, piqa: 57.62, arcChall: 23.29, arithmark2: 26.04, links: { card: 'https://huggingface.co/finnianx/michel-micro' } },
{ name: 'kirk-tung', org: 'rtc2022', params: 53111296, paramsDisplay: '53M', arc: 30.43, hellaswag: 26.32, piqa: 52.61, arcChall: 22.01, arithmark2: 24.88, links: { card: 'https://huggingface.co/rtc2022/kirk-tung' } },
// ── Models with partial benchmark data ──
// { name: 'Glint-1.3 (merged)', org: 'compactai', params: 982000, paramsDisplay: '982K', blimp: null, arc: null, hellaswag: null, piqa: null, arcChall: null, arithmark2: 24.68, status: 'released', description: 'Merged variant of the Glint-1 sub-1M model family. ArithMark-2 evaluated.', links: { card: 'https://huggingface.co/CompactAI-O/Glint-1.3' } },
//{ name: 'PotentSulfurLM-500K-Base', org: 'mihaipopa', params: 500000, paramsDisplay: '500K', arc: 26.81, hellaswag: 27.36, piqa: 51.58, arcChall: 22.87, arithmark2: 25.80, links: { card: 'https://huggingface.co/MihaiPopa-1/PotentSulfurLM-500K-Base' } },
//{ name: 'veyra3-5m-base', org: 'veyraai', params: 4496384, paramsDisplay: '4.5M', arc: 25.84, hellaswag: 25.44, piqa: 52.77, arcChall: 20.39, arithmark2: 24.72, links: { card: 'https://huggingface.co/veyra-ai/veyra3-5m-base' } },
// ── Models without benchmark data (pending) ──
// { name: 'Glint-1', org: 'compactai', params: 1000000, paramsDisplay: '1M', blimp: null, arc: null, hellaswag: null, piqa: null, arcChall: null, arithmark2: null, status: 'released', description: 'CompactAI flagship sub-1M model. Trained on 100B tokens with progressive data curriculum.', links: { card: 'https://huggingface.co/CompactAI-O/Glint-1' } },
// { name: 'CinnabarLM 4M', org: 'mihaipopa', params: 4230000, paramsDisplay: '4.23M', blimp: null, arc: null, hellaswag: null, piqa: null, arcChall: null, arithmark2: null, status: 'released', description: '4M parameter base model with strong perplexity results.', links: { card: 'https://huggingface.co/MihaiPopa-1/CinnabarLM-4M-Base' } },
// { name: 'CinnabarLM 1.5M', org: 'mihaipopa', params: 1710000, paramsDisplay: '1.71M', blimp: null, arc: null, hellaswag: null, piqa: null, arcChall: null, arithmark2: null, status: 'released', description: '1.5M parameter variant of the CinnabarLM family.', links: { card: 'https://huggingface.co/MihaiPopa-1/CinnabarLM-1.5M-Base' } },
// { name: 'CinnabarLM 1.4M', org: 'mihaipopa', params: 1510000, paramsDisplay: '1.51M', blimp: null, arc: null, hellaswag: null, piqa: null, arcChall: null, arithmark2: null, status: 'released', description: 'Smallest CinnabarLM variant with competitive BLiMP scores.', links: { card: 'https://huggingface.co/MihaiPopa-1/CinnabarLM-1.4M-Base' } },
// { name: 'Glint-0.4', org: 'compactai', params: 1000000, paramsDisplay: '1M', blimp: null, arc: null, hellaswag: null, piqa: null, arcChall: null, arithmark2: null, status: 'released', description: 'Earlier Glint iteration trained on 10B tokens.', links: { card: 'https://huggingface.co/CompactAI-O/Glint-0.4' } },
// { name: 'Supra-Mini-v3', org: 'supralabs', params: 468000, paramsDisplay: '468K', blimp: null, arc: null, hellaswag: null, piqa: null, arcChall: null, arithmark2: null, status: 'released', description: 'Sub-500K model showing strong linguistic capabilities.', links: { card: 'https://huggingface.co/SupraLabs/Supra-Mini-v3-0.5M' } },
// { name: 'Supra-Mini-v2', org: 'supralabs', params: 168000, paramsDisplay: '168K', blimp: null, arc: null, hellaswag: null, piqa: null, arcChall: null, arithmark2: null, status: 'released', description: 'Sub-200K exploration.', links: { card: 'https://huggingface.co/SupraLabs/Supra-Mini-v2-0.1M' } },
// { name: 'Glint-0.2', org: 'compactai', params: 1000000, paramsDisplay: '1M', blimp: null, arc: null, hellaswag: null, piqa: null, arcChall: null, arithmark2: null, status: 'released', description: 'Early Glint prototype.', links: { card: 'https://huggingface.co/CompactAI-O/Glint-0.2' } },
// { name: 'Glint-0.3', org: 'compactai', params: 1000000, paramsDisplay: '1M', blimp: null, arc: null, hellaswag: null, piqa: null, arcChall: null, arithmark2: null, status: 'released', description: 'Third Glint prototype.', links: { card: 'https://huggingface.co/CompactAI-O/Glint-0.3' } },
// { name: 'Glint-0.1', org: 'compactai', params: 1000000, paramsDisplay: '1M', blimp: null, arc: null, hellaswag: null, piqa: null, arcChall: null, arithmark2: null, status: 'released', description: 'First Glint prototype.', links: { card: 'https://huggingface.co/CompactAI-O/Glint-0.1' } },
// { name: 'StorySupra-10M', org: 'supralabs', params: 12600000, paramsDisplay: '12.6M', blimp: null, arc: null, hellaswag: null, piqa: null, arcChall: null, arithmark2: null, status: 'tbd', description: 'Story-focused model from SupraLabs.', links: { card: 'https://huggingface.co/SupraLabs/StorySupra-10M' } },
// { name: 'DistillSupra-0.2M', org: 'supralabs', params: 289000, paramsDisplay: '289K', blimp: null, arc: null, hellaswag: null, piqa: null, arcChall: null, arithmark2: null, status: 'tbd', description: 'Distilled variant of the Supra series.', links: { card: 'https://huggingface.co/SupraLabs/DistillSupra-0.2M' } },
// { name: 'MicroSupra-1k', org: 'supralabs', params: 1000, paramsDisplay: '1K', blimp: null, arc: null, hellaswag: null, piqa: null, arcChall: null, arithmark2: null, status: 'tbd', description: 'Extreme micro-scale experiment at 1K parameters.', links: { card: 'https://huggingface.co/SupraLabs/MicroSupra-1k' } },
// { name: 'TrueMath', org: 'compactai', params: null, paramsDisplay: '1-layer',blimp: null, arc: null, hellaswag: null, piqa: null, arcChall: null, arithmark2: null, status: 'tbd', description: 'Single-layer mathematical reasoning experiment.', links: { card: 'https://huggingface.co/CompactAI-O/TrueMath' } },
];
// ═══════════════════════════════════════════════════════════════
// ═══ STATE
// ═══════════════════════════════════════════════════════════════
const BENCHMARKS = ['arc', 'hellaswag', 'piqa', 'arcChall', 'arithmark2'];
const METRICS = [
{ key: 'avg', label: 'Avg', fullLabel: 'Average Score' },
{ key: 'hellaswag', label: 'HellaSwag', fullLabel: 'HellaSwag' },
{ key: 'arc', label: 'ARC-Easy', fullLabel: 'ARC-Easy' },
{ key: 'arcChall', label: 'ARC-Challenge', fullLabel: 'ARC-Challenge' },
{ key: 'piqa', label: 'PIQA', fullLabel: 'PIQA' },
{ key: 'arithmark2', label: 'ArithMark-2', fullLabel: 'ArithMark-2' },
];
const RANDOM_BASELINES = {
avg: 31.25,
hellaswag: 25,
arc: 25,
arcChall: 25,
piqa: 50,
arithmark2: 25
};
let sortCol = 'avg';
let sortAsc = false;
let activeFilter = 'all';
let activeBenchmark = 'avg';
let chartInstances = {};
const ORG_MOVEMENT_CUTOFF_TYPE = 'orgMovementCutoff';
function getAvg(m) {
// Avg = (HellaSwag + arcAvg + PIQA + ArithMark-2) / available_count
// where arcAvg = (ARC-Easy + ARC-Challenge) / 2
const components = [];
if (m.hellaswag !== null && m.hellaswag !== undefined) components.push(m.hellaswag);
const hasArc = m.arc !== null && m.arc !== undefined;
const hasArcChall = m.arcChall !== null && m.arcChall !== undefined;
if (hasArc && hasArcChall) components.push((m.arc + m.arcChall) / 2);
else if (hasArc) components.push(m.arc);
else if (hasArcChall) components.push(m.arcChall);
if (m.piqa !== null && m.piqa !== undefined) components.push(m.piqa);
if (m.arithmark2 !== null && m.arithmark2 !== undefined) components.push(m.arithmark2);
return components.length >= 2 ? components.reduce((a, b) => a + b, 0) / components.length : null;
}
function getSortVal(m, col) {
if (col === 'avg') return getMetricValue(m, 'avg') ?? -Infinity;
if (col === 'params') return m.params ?? 0;
if (col === 'rank') return -(getMetricValue(m, 'avg') ?? -Infinity);
return m[col] ?? -Infinity;
}
function getMetricValue(m, metric = activeBenchmark) {
return metric === 'avg' ? getAvg(m) : m[metric];
}
function getMetricLabel(metric = activeBenchmark, full = false) {
const item = METRICS.find(x => x.key === metric);
return item ? (full ? item.fullLabel : item.label) : metric;
}
function getModelEntries(models = MODELS) {
return models.filter(m => !m.type);
}
function getFilteredModels(models = MODELS) {
let arr = getModelEntries(models);
if (activeFilter === '<10M') arr = arr.filter(m => m.params && m.params < 10000000);
else if (activeFilter === '<50M') arr = arr.filter(m => m.params && m.params < 50000000);
else if (activeFilter === '<100M') arr = arr.filter(m => m.params && m.params < 100000000);
return arr;
}
function getOrgMovementBaselineModels() {
const cutoffIndex = MODELS.findIndex(m => m.type === ORG_MOVEMENT_CUTOFF_TYPE);
const baselineModels = cutoffIndex === -1 ? MODELS : MODELS.slice(0, cutoffIndex);
return getFilteredModels(baselineModels);
}
function getChartModels() {
return getFilteredModels()
.filter(m => getMetricValue(m, activeBenchmark) !== null && getMetricValue(m, activeBenchmark) !== undefined);
}
function getAvgModels(models = getFilteredModels()) {
return models
.map(m => ({ ...m, avg: getAvg(m) }))
.filter(m => m.avg !== null && m.avg !== undefined);
}
function getSortedModels() {
const arr = getFilteredModels();
arr.sort((a, b) => {
const av = getSortVal(a, sortCol);
const bv = getSortVal(b, sortCol);
return sortAsc ? (av > bv ? 1 : -1) : (av > bv ? -1 : 1);
});
return arr;
}
function formatSigned(value) {
if (value === null || value === undefined) return 'N/A';
return `${value >= 0 ? '+' : ''}${value.toFixed(2)}`;
}
function topByMetric(metric) {
return getFilteredModels()
.map(m => ({ ...m, score: getMetricValue(m, metric) }))
.filter(m => m.score !== null && m.score !== undefined)
.sort((a, b) => b.score - a.score)[0] || null;
}
function getStdDev(values) {
if (!values.length) return 0;
const mean = values.reduce((sum, v) => sum + v, 0) / values.length;
const variance = values.reduce((sum, v) => sum + Math.pow(v - mean, 2), 0) / values.length;
return Math.sqrt(variance);
}
function getOrgFitRows(modelsForRows = getFilteredModels()) {
const models = getAvgModels(modelsForRows)
.filter(m => m.params)
.map(m => ({ ...m, logParams: Math.log10(m.params) }));
if (models.length < 2) return { residualStd: null, rows: [] };
const { slope, intercept } = linearRegression(models.map(m => [m.logParams, m.avg]));
const withFit = models.map(m => {
const expected = slope * m.logParams + intercept;
return { ...m, expected, residual: m.avg - expected };
});
const residualStd = getStdDev(withFit.map(m => m.residual));
const denom = residualStd || 1;
const grouped = {};
withFit.forEach(m => {
if (!grouped[m.org]) grouped[m.org] = [];
grouped[m.org].push({ ...m, fitZ: m.residual / denom });
});
const rows = Object.entries(grouped).map(([orgKey, items]) => {
const avgScore = items.reduce((sum, m) => sum + m.avg, 0) / items.length;
const meanFitZ = items.reduce((sum, m) => sum + m.fitZ, 0) / items.length;
const bestFit = [...items].sort((a, b) => b.fitZ - a.fitZ)[0];
return {
orgKey,
org: ORGS[orgKey],
count: items.length,
avgScore,
meanFitZ,
topModel: bestFit.name,
topScore: bestFit.avg,
topFitZ: bestFit.fitZ
};
}).sort((a, b) => b.meanFitZ - a.meanFitZ);
return { residualStd, rows };
}
function getOrgRankMovement(rows, baselineRows) {
const previousRanks = new Map(baselineRows.map((row, i) => [row.orgKey, i + 1]));
return rows.reduce((movement, row, i) => {
const currentRank = i + 1;
const previousRank = previousRanks.get(row.orgKey);
if (!previousRank) {
movement[row.orgKey] = { className: 'new', label: 'new' };
} else {
const delta = previousRank - currentRank;
movement[row.orgKey] = {
className: delta > 0 ? 'up' : delta < 0 ? 'down' : 'same',
label: delta === 0 ? '-' : `${delta > 0 ? '↑ ' : '↓ '}${Math.abs(delta)}`
};
}
return movement;
}, {});
}
// ─── Green color scale: muted gray β†’ #4ade80 ───
function scoreColor(value, min, max) {
if (value === null || value === undefined || min === max) return '';
let pct = (value - min) / (max - min);
pct = Math.max(0, Math.min(1, Math.pow(pct, 1.5)));
const r = Math.round(107 + (74 - 107) * pct);
const g = Math.round(107 + (222 - 107) * pct);
const b = Math.round(110 + (128 - 110) * pct);
return `color: rgb(${r},${g},${b})`;
}
// ═══════════════════════════════════════════════════════════════
// ═══ RENDER: FILTER BAR
// ═══════════════════════════════════════════════════════════════
function renderFilters() {
const bar = document.getElementById('filter-bar');
const filters = ['all', '<100M', '<50M', '<10M'];
bar.innerHTML = filters.map(f => `
<button class="filter-btn ${activeFilter === f ? 'active' : ''}" onclick="setFilter('${f}')">${f === 'all' ? 'All' : f}</button>
`).join('');
}
function renderBenchmarkFilters() {
const benchmarkBar = document.getElementById('benchmark-filter-bar');
if (!benchmarkBar) return;
benchmarkBar.innerHTML = METRICS.map(m => `
<button class="filter-btn ${activeBenchmark === m.key ? 'active' : ''}" onclick="setBenchmark('${m.key}')">${m.label}</button>
`).join('');
}
function renderHighlights() {
const grid = document.getElementById('insight-grid');
if (!grid) return;
const filtered = getFilteredModels();
const orgCount = new Set(filtered.map(m => m.org).filter(Boolean)).size;
const filterMeta = activeFilter === 'all' ? 'All parameter sizes' : `${activeFilter} parameters`;
const statCards = [
{
label: 'Models tracked',
value: filtered.length,
meta: filterMeta,
border: 'var(--accent)'
},
{
label: 'Orgs represented',
value: orgCount,
meta: filterMeta,
border: 'var(--accent-muted)'
}
];
const metricCards = METRICS.map(metric => {
const winner = topByMetric(metric.key);
if (!winner) {
return {
label: `Best ${metric.label}`,
value: 'No data',
score: '',
meta: filterMeta,
border: 'var(--border-strong)'
};
}
const org = ORGS[winner.org];
return {
label: `Best ${metric.label}`,
value: winner.name,
href: winner.links?.card,
score: `${winner.score.toFixed(2)}%`,
meta: `<a href="${org.url}" target="_blank">${org.name}</a> &middot; ${winner.paramsDisplay}`,
border: org.chartBorder
};
});
grid.innerHTML = [
...statCards.map(card => `
<div class="insight-card" style="border-top-color:${card.border}">
<span class="insight-label">${card.label}</span>
<span class="insight-value stat">${card.value}</span>
<span class="insight-meta">${card.meta}</span>
</div>
`),
...metricCards.map(card => `
<div class="insight-card" style="border-top-color:${card.border}">
<span class="insight-label">${card.label}</span>
${card.href ? `<a class="insight-value" href="${card.href}" target="_blank">${card.value}</a>` : `<span class="insight-value">${card.value}</span>`}
${card.score ? `<span class="insight-score">${card.score}</span>` : ''}
<span class="insight-meta">${card.meta}</span>
</div>
`)
].join('');
}
function setFilter(f) {
activeFilter = f;
renderFilters();
renderHighlights();
renderTable();
renderBarChart();
renderScatter();
renderOrgLeaderboard();
}
function setBenchmark(metric) {
activeBenchmark = metric;
renderBenchmarkFilters();
renderBarChart();
renderScatter();
}
// ═══════════════════════════════════════════════════════════════
// ═══ RENDER: TABLE
// ═══════════════════════════════════════════════════════════════
function renderTable() {
const tbody = document.getElementById('leaderboard-body');
const sorted = getSortedModels();
const allModels = getFilteredModels();
// Best values per column (for highlighting + color scaling)
const getBest = (key) => {
const vals = allModels.map(m => m[key]).filter(v => v !== null && v !== undefined);
return vals.length ? Math.max(...vals) : null;
};
const getMin = (key) => {
const vals = allModels.map(m => m[key]).filter(v => v !== null && v !== undefined);
return vals.length ? Math.min(...vals) : null;
};
const best = {};
const mins = {};
BENCHMARKS.forEach(k => { best[k] = getBest(k); mins[k] = getMin(k); });
const avgVals = allModels.map(getAvg).filter(v => v !== null);
best.avg = avgVals.length ? Math.max(...avgVals) : null;
mins.avg = avgVals.length ? Math.min(...avgVals) : null;
const fmtCell = (val, key) => {
if (val === null || val === undefined) return '<span class="td-score na">β€”</span>';
const isBest = best[key] !== null && Math.abs(val - best[key]) < 0.001;
const cls = isBest ? 'best' : '';
return `<span class="td-score ${cls}" style="${scoreColor(val, mins[key], best[key])}">${val.toFixed(2)}%</span>`;
};
tbody.innerHTML = sorted.map((m, i) => {
const org = ORGS[m.org];
const rank = i + 1;
const avg = getAvg(m);
return `
<tr>
<td class="td-rank">${rank}</td>
<td class="td-model"><a href="${m.links.card}" target="_blank">${m.name}</a><span class="model-org"><a href="${org.url}" target="_blank">${org.name}</a> Β· ${m.paramsDisplay}</span></td>
<td class="td-params">${m.paramsDisplay}</td>
<td class="td-score">${avg !== null ? `<span class="${Math.abs(avg - best.avg) < 0.001 ? 'best' : ''}" style="${scoreColor(avg, mins.avg, best.avg)}">${avg.toFixed(2)}%</span>` : '<span class="td-score na">β€”</span>'}</td>
<td class="td-score">${fmtCell(m.hellaswag, 'hellaswag')}</td>
<td class="td-score">${fmtCell(m.arc, 'arc')}</td>
<td class="td-score">${fmtCell(m.arcChall, 'arcChall')}</td>
<td class="td-score">${fmtCell(m.piqa, 'piqa')}</td>
<td class="td-score">${fmtCell(m.arithmark2, 'arithmark2')}</td>
</tr>
`;
}).join('');
// Sort indicators
document.querySelectorAll('thead th').forEach(th => {
th.classList.remove('sorted');
const ind = th.querySelector('.sort-indicator');
if (ind) ind.textContent = '';
});
const colMap = { rank: 0, name: 1, params: 2, avg: 3, hellaswag: 4, arc: 5, arcChall: 6, piqa: 7, arithmark2: 8 };
const thIdx = colMap[sortCol];
if (thIdx !== undefined) {
const th = document.querySelectorAll('thead th')[thIdx];
th.classList.add('sorted');
const ind = th.querySelector('.sort-indicator');
if (ind) ind.textContent = sortAsc ? 'β–²' : 'β–Ό';
}
}
function sortTable(col) {
if (sortCol === col) {
sortAsc = !sortAsc;
} else {
sortCol = col;
sortAsc = false;
}
renderTable();
}
// ═══════════════════════════════════════════════════════════════
// ═══ RENDER: LEGEND + SCATTER PLOT
// ═══════════════════════════════════════════════════════════════
function renderBarChart() {
const canvas = document.getElementById('barChart');
if (!canvas) return;
const ctx = canvas.getContext('2d');
if (chartInstances['bar']) chartInstances['bar'].destroy();
const metricLabel = getMetricLabel(activeBenchmark, true);
const sub = document.getElementById('score-chart-sub');
if (sub) sub.textContent = `Top ${metricLabel.toLowerCase()} scores for the active size filter.`;
const models = getChartModels()
.map(m => ({ ...m, score: getMetricValue(m, activeBenchmark) }))
.filter(m => m.score !== null && m.score !== undefined)
.sort((a, b) => b.score - a.score)
.slice(0, 12);
if (!models.length) {
canvas.parentElement.querySelector('h3').textContent = `Top ${metricLabel} Scores - no models with data`;
return;
}
canvas.parentElement.querySelector('h3').textContent = `Top ${models.length} ${metricLabel} Scores`;
chartInstances['bar'] = new Chart(ctx, {
type: 'bar',
data: {
labels: models.map(m => m.name),
datasets: [{
label: metricLabel,
data: models.map(m => m.score),
backgroundColor: models.map(m => ORGS[m.org].chartColor.replace('0.70', '0.82')),
borderColor: models.map(m => ORGS[m.org].chartBorder),
borderWidth: 1.5,
borderRadius: 4,
borderSkipped: false
}]
},
options: {
indexAxis: 'y',
responsive: true,
maintainAspectRatio: true,
animation: { duration: 0 },
plugins: {
legend: { display: false },
tooltip: {
backgroundColor: 'rgba(20,20,22,0.96)',
borderColor: 'rgba(194,182,255,0.25)',
borderWidth: 1,
titleColor: '#e8e8e8',
bodyColor: '#6b6b6e',
padding: 10,
displayColors: false,
callbacks: {
label: (item) => {
const model = models[item.dataIndex];
return [`${metricLabel}: ${item.raw.toFixed(2)}%`, `Params: ${model.paramsDisplay}`];
}
}
}
},
scales: {
x: {
min: 0,
suggestedMax: Math.max(...models.map(m => m.score)) * 1.08,
title: {
display: true,
text: `${metricLabel} (%)`,
color: '#454548',
font: { family: "'Space Mono', monospace", size: 10 }
},
grid: { color: 'rgba(255,255,255,0.03)' },
ticks: { color: '#454548', font: { family: "'Space Mono', monospace", size: 10 }, callback: v => v + '%', maxTicksLimit: 8 },
border: { display: false }
},
y: {
grid: { display: false },
ticks: { color: '#6b6b6e', font: { family: "'DM Sans', system-ui, sans-serif", size: 11 } },
border: { display: false }
}
}
}
});
}
function renderOrgLeaderboard() {
const tbody = document.getElementById('org-leaderboard-body');
if (!tbody) return;
const { residualStd, rows } = getOrgFitRows();
const { rows: baselineRows } = getOrgFitRows(getOrgMovementBaselineModels());
const movement = getOrgRankMovement(rows, baselineRows);
const sub = document.getElementById('org-leaderboard-sub');
const filterText = activeFilter === 'all' ? 'all model sizes' : `${activeFilter} models`;
if (sub) {
sub.textContent = residualStd === null
? `Need at least two models with Avg and parameter counts for ${filterText}.`
: `Average standard deviations above or below the Avg-vs-size fit line across ${filterText}. Movement compares against rankings above the cutoff marker. Residual std dev: ${residualStd.toFixed(2)} pts.`;
}
if (!rows.length) {
tbody.innerHTML = `
<tr>
<td class="td-rank">-</td>
<td class="td-model">No organization data</td>
<td class="td-params">-</td>
<td class="td-score na">N/A</td>
<td class="td-score na">N/A</td>
<td class="td-model"><span class="model-org">Try a broader size filter.</span></td>
</tr>
`;
return;
}
tbody.innerHTML = rows.map((row, i) => {
const zClass = row.meanFitZ >= 0 ? 'best' : '';
const move = movement[row.orgKey] || { className: 'same', label: '-' };
return `
<tr>
<td class="td-rank">${i + 1}</td>
<td class="td-model"><span class="org-title-line"><a href="${row.org.url}" target="_blank">${row.org.name}</a><span class="rank-move ${move.className}">${move.label}</span></span><span class="model-org">Best residual: ${row.topModel}</span></td>
<td class="td-params">${row.count}</td>
<td class="td-score"><span class="${zClass}">${formatSigned(row.meanFitZ)} std</span></td>
<td class="td-score">${row.avgScore.toFixed(2)}%</td>
<td class="td-model">${row.topModel}<span class="model-org">${row.topScore.toFixed(2)}% avg &middot; ${formatSigned(row.topFitZ)} std</span></td>
</tr>
`;
}).join('');
}
function renderLegend() {
const bar = document.getElementById('legend-bar');
bar.innerHTML = Object.entries(ORGS).map(([key, o]) => `
<div class="legend-item"><span class="legend-dot" style="background:${o.chartBorder}"></span>${o.name}</div>
`).join('');
}
function linearRegression(points) {
const n = points.length;
if (n < 2) return { slope: 0, intercept: 0 };
let sumX = 0, sumY = 0, sumXY = 0, sumXX = 0;
for (const [x, y] of points) {
sumX += x; sumY += y; sumXY += x * y; sumXX += x * x;
}
const denom = n * sumXX - sumX * sumX;
if (Math.abs(denom) < 1e-10) return { slope: 0, intercept: sumY / n };
const slope = (n * sumXY - sumX * sumY) / denom;
const intercept = (sumY - slope * sumX) / n;
return { slope, intercept };
}
function renderScatter() {
const canvas = document.getElementById('scatterChart');
if (!canvas) return;
const ctx = canvas.getContext('2d');
if (chartInstances['scatter']) chartInstances['scatter'].destroy();
const metricLabel = getMetricLabel(activeBenchmark, true);
const sub = document.getElementById('efficiency-sub');
if (sub) sub.textContent = `${metricLabel} vs parameter count (log scale). Shaded zone = above regression line. Dotted line = random baseline.`;
const models = getChartModels()
.filter(m => m.params && getMetricValue(m, activeBenchmark) !== null && getMetricValue(m, activeBenchmark) !== undefined)
.map(m => ({
x: Math.log10(m.params),
y: getMetricValue(m, activeBenchmark),
name: m.name,
org: m.org,
params: m.paramsDisplay
}));
if (models.length < 2) {
canvas.parentElement.querySelector('h3').textContent = `${metricLabel} vs Log Parameters - need 2+ models with data`;
return;
}
canvas.parentElement.querySelector('h3').textContent = `${metricLabel} vs Log Parameters`;
// Regression
const points = models.map(m => [m.x, m.y]);
const { slope, intercept } = linearRegression(points);
const residualStd = getStdDev(models.map(m => m.y - (slope * m.x + intercept)));
models.forEach(m => {
const residual = m.y - (slope * m.x + intercept);
m.fitZ = residual / (residualStd || 1);
});
// X range for regression line
const xMin = Math.min(...models.map(m => m.x));
const xMax = Math.max(...models.map(m => m.x));
const pad = (xMax - xMin) * 0.1;
const xLow = xMin - pad;
const xHigh = xMax + pad;
const yAtLow = slope * xLow + intercept;
const yAtHigh = slope * xHigh + intercept;
const randomBaseline = RANDOM_BASELINES[activeBenchmark] ?? null;
const yMaxSource = randomBaseline === null ? models.map(m => m.y) : [...models.map(m => m.y), randomBaseline];
const yMax = Math.max(...yMaxSource) * 1.08;
renderLegend();
chartInstances['scatter'] = new Chart(ctx, {
type: 'scatter',
data: {
datasets: [
// Regression line β€” fill upward to top of chart (upper triangular zone)
{
type: 'line',
label: 'Regression',
data: [{ x: xLow, y: yAtLow }, { x: xHigh, y: yAtHigh }],
borderColor: 'rgba(194, 182, 255, 0.45)',
borderWidth: 1.5,
borderDash: [5, 4],
pointRadius: 0,
fill: 'end',
backgroundColor: 'rgba(194, 182, 255, 0.10)',
order: 0
},
// Random baseline for the active benchmark
{
type: 'line',
label: 'Random',
data: randomBaseline === null ? [] : [{ x: xLow, y: randomBaseline }, { x: xHigh, y: randomBaseline }],
borderColor: 'rgba(107, 107, 110, 0.75)',
borderWidth: 1.25,
borderDash: [2, 4],
pointRadius: 0,
fill: false,
order: 1
},
// Scatter points on top
{
label: 'Models',
data: models,
backgroundColor: models.map(m => ORGS[m.org].chartColor.replace('0.70', '0.90')),
borderColor: models.map(m => ORGS[m.org].chartBorder),
borderWidth: 1.5,
pointRadius: 7,
pointHoverRadius: 10,
order: 2
}
]
},
options: {
responsive: true,
maintainAspectRatio: true,
animation: { duration: 0 },
plugins: {
legend: { display: false },
tooltip: {
backgroundColor: 'rgba(20,20,22,0.96)',
borderColor: 'rgba(194,182,255,0.25)',
borderWidth: 1,
titleColor: '#e8e8e8',
bodyColor: '#6b6b6e',
padding: 10,
displayColors: false,
callbacks: {
title: (items) => items[0]?.raw?.name || '',
label: (item) => {
const d = item.raw;
return [
`Params: ${d.params}`,
`${metricLabel}: ${d.y?.toFixed(2)}%`,
`Fit residual: ${formatSigned(d.fitZ)} std`
];
}
}
}
},
scales: {
x: {
type: 'linear',
min: xLow,
max: xHigh,
title: {
display: true,
text: 'Log₁₀(Parameters)',
color: '#454548',
font: { family: "'Space Mono', monospace", size: 10 }
},
grid: { color: 'rgba(255,255,255,0.03)' },
ticks: { color: '#454548', font: { family: "'Space Mono', monospace", size: 10 }, maxTicksLimit: 8 },
border: { display: false }
},
y: {
min: Math.max(0, (yAtLow - (yMax - yAtLow) * 0.3)),
max: yMax,
title: {
display: true,
text: `${metricLabel} (%)`,
color: '#454548',
font: { family: "'Space Mono', monospace", size: 10 }
},
grid: { color: 'rgba(255,255,255,0.03)' },
ticks: { color: '#454548', font: { family: "'Space Mono', monospace", size: 10 }, callback: v => v.toFixed(0) + '%', maxTicksLimit: 8 },
border: { display: false }
}
}
}
});
}
// ═══════════════════════════════════════════════════════════════
// ═══ INIT
// ═══════════════════════════════════════════════════════════════
window.addEventListener('DOMContentLoaded', () => {
renderFilters();
renderBenchmarkFilters();
renderHighlights();
renderTable();
renderBarChart();
renderLegend();
renderScatter();
renderOrgLeaderboard();
});
</script>
</body>
</html>