| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <title>Open SLM Leaderboard</title> |
| <link rel="preconnect" href="https://fonts.googleapis.com"> |
| <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> |
| <link href="https://fonts.googleapis.com/css2?family=DM+Sans:ital,opsz,wght@0,9..40,300;0,9..40,400;0,9..40,500;0,9..40,600;0,9..40,700;1,9..40,400&family=Space+Mono:wght@400;700&display=swap" rel="stylesheet"> |
| <script src="https://cdn.jsdelivr.net/npm/chart.js@4"></script> |
| <style> |
| :root { |
| --bg: #0a0a0b; |
| --bg-alt: #111113; |
| --surface: #141416; |
| --surface-hover: #1a1a1d; |
| --border: #232326; |
| --border-subtle: #19191c; |
| --border-strong: #2e2e32; |
| --text: #e8e8e8; |
| --text-secondary: #6b6b6e; |
| --text-muted: #454548; |
| --accent: #c2b6ff; |
| --accent-muted: #7a6fb0; |
| --accent-soft: rgba(194, 182, 255, 0.08); |
| --best: #4ade80; |
| --worst: #454548; |
| } |
| * { margin: 0; padding: 0; box-sizing: border-box; } |
| html { scroll-behavior: smooth; } |
| body { |
| font-family: 'DM Sans', system-ui, -apple-system, sans-serif; |
| background: var(--bg); |
| color: var(--text); |
| line-height: 1.6; |
| -webkit-font-smoothing: antialiased; |
| } |
| |
| .container { max-width: 1100px; margin: 0 auto; padding: 0 40px; } |
| |
| |
| .hero { padding: 80px 0 60px; text-align: center; } |
| .hero h1 { |
| font-size: clamp(44px, 7vw, 80px); |
| font-weight: 300; |
| letter-spacing: -3px; |
| line-height: 1.0; |
| } |
| .hero h1 span { color: var(--accent); } |
| .hero-sub { |
| font-size: 17px; |
| color: var(--text-secondary); |
| margin-top: 18px; |
| max-width: 520px; |
| margin-left: auto; |
| margin-right: auto; |
| font-weight: 400; |
| letter-spacing: -0.2px; |
| } |
| |
| .hero-sub a { |
| color: var(--accent); |
| text-decoration: none; |
| border-bottom: 1px solid transparent; |
| transition: border-color 0.15s; |
| } |
| |
| .hero-sub a:hover { |
| border-bottom-color: var(--accent); |
| } |
| .hero-note { |
| margin-top: 40px; |
| background: var(--surface); |
| border: 1px solid var(--border-subtle); |
| border-left: 3px solid var(--accent); |
| border-radius: 0 10px 10px 0; |
| padding: 20px 28px; |
| text-align: left; |
| font-size: 14px; |
| color: var(--text-secondary); |
| line-height: 1.7; |
| } |
| .hero-note strong { color: var(--text); font-weight: 600; } |
| .hero-note a { |
| color: var(--accent); |
| text-decoration: none; |
| border-bottom: 1px solid transparent; |
| transition: border-color 0.15s; |
| } |
| .hero-note a:hover { border-bottom-color: var(--accent); } |
| |
| |
| .section-title { |
| font-size: clamp(36px, 5vw, 56px); |
| font-weight: 300; |
| letter-spacing: -2px; |
| line-height: 1.05; |
| margin-bottom: 8px; |
| } |
| .section-sub { |
| font-size: 14px; |
| color: var(--text-secondary); |
| margin-bottom: 24px; |
| font-weight: 400; |
| } |
| |
| |
| .insight-grid { |
| display: grid; |
| grid-template-columns: repeat(4, minmax(0, 1fr)); |
| gap: 12px; |
| } |
| .insight-card { |
| min-height: 118px; |
| background: var(--surface); |
| border: 1px solid var(--border-subtle); |
| border-top: 2px solid var(--border-strong); |
| border-radius: 8px; |
| padding: 16px; |
| } |
| .insight-card:hover { |
| background: var(--surface-hover); |
| border-color: var(--border); |
| } |
| .insight-label { |
| display: block; |
| font-family: 'Space Mono', monospace; |
| font-size: 10px; |
| letter-spacing: 0.08em; |
| text-transform: uppercase; |
| color: var(--text-muted); |
| margin-bottom: 10px; |
| } |
| .insight-value { |
| display: block; |
| color: var(--text); |
| font-size: 20px; |
| font-weight: 500; |
| line-height: 1.1; |
| letter-spacing: -0.4px; |
| overflow-wrap: anywhere; |
| } |
| a.insight-value { |
| text-decoration: none; |
| transition: color 0.15s; |
| } |
| a.insight-value:hover { |
| color: var(--accent); |
| } |
| .insight-value.stat { |
| font-family: 'Space Mono', monospace; |
| font-size: 28px; |
| letter-spacing: -1px; |
| } |
| .insight-score { |
| display: block; |
| margin-top: 10px; |
| color: var(--best); |
| font-family: 'Space Mono', monospace; |
| font-size: 12px; |
| font-weight: 700; |
| } |
| .insight-meta { |
| display: block; |
| margin-top: 4px; |
| color: var(--text-muted); |
| font-size: 11px; |
| line-height: 1.4; |
| } |
| .insight-meta a { |
| color: var(--text-muted); |
| text-decoration: none; |
| border-bottom: 1px solid transparent; |
| } |
| .insight-meta a:hover { |
| color: var(--accent-muted); |
| border-bottom-color: var(--accent-muted); |
| } |
| |
| |
| .filter-bar { |
| display: flex; |
| gap: 8px; |
| margin-bottom: 12px; |
| flex-wrap: wrap; |
| } |
| .filter-group { |
| margin-bottom: 18px; |
| } |
| .filter-label { |
| display: block; |
| font-family: 'Space Mono', monospace; |
| font-size: 10px; |
| letter-spacing: 0.08em; |
| text-transform: uppercase; |
| color: var(--text-muted); |
| margin-bottom: 8px; |
| } |
| .filter-btn { |
| font-family: 'Space Mono', monospace; |
| font-size: 11px; |
| font-weight: 400; |
| letter-spacing: 0.02em; |
| padding: 5px 12px; |
| border-radius: 6px; |
| border: 1px solid var(--border); |
| background: transparent; |
| color: var(--text-secondary); |
| cursor: pointer; |
| transition: all 0.15s; |
| } |
| .filter-btn:hover { |
| border-color: var(--accent-muted); |
| color: var(--text); |
| } |
| .filter-btn.active { |
| border-color: var(--accent); |
| background: var(--accent-soft); |
| color: var(--accent); |
| } |
| |
| |
| .table-wrap { |
| overflow-x: auto; |
| border: 1px solid var(--border-subtle); |
| border-radius: 10px; |
| background: var(--surface); |
| } |
| table { width: 100%; border-collapse: collapse; font-size: 14px; } |
| thead th { |
| padding: 12px 12px; |
| text-align: center; |
| font-size: 10px; |
| font-weight: 600; |
| letter-spacing: 0.08em; |
| text-transform: uppercase; |
| color: var(--text-muted); |
| border-bottom: 1px solid var(--border); |
| cursor: pointer; |
| user-select: none; |
| white-space: nowrap; |
| transition: color 0.15s; |
| font-family: 'Space Mono', monospace; |
| } |
| thead th:hover { color: var(--accent); } |
| thead th:first-child, tbody td:first-child { text-align: left; } |
| thead th .sort-indicator { |
| margin-left: 3px; |
| opacity: 0; |
| font-size: 9px; |
| color: var(--accent); |
| } |
| thead th.sorted { color: var(--accent); } |
| thead th.sorted .sort-indicator { opacity: 1; } |
| tbody tr { |
| border-bottom: 1px solid var(--border-subtle); |
| transition: background 0.12s; |
| } |
| tbody tr:last-child { border-bottom: none; } |
| tbody tr:hover { background: var(--accent-soft); } |
| tbody td { |
| padding: 12px 12px; |
| text-align: center; |
| vertical-align: middle; |
| } |
| .td-rank { |
| font-weight: 700; |
| font-size: 12px; |
| color: var(--accent-muted); |
| width: 32px; |
| font-family: 'Space Mono', monospace; |
| } |
| .org-title-line { |
| display: inline-flex; |
| align-items: center; |
| gap: 8px; |
| } |
| .rank-move { |
| display: inline-flex; |
| align-items: center; |
| justify-content: center; |
| min-width: 30px; |
| height: 17px; |
| padding: 0 5px; |
| border-radius: 4px; |
| border: 1px solid var(--border-subtle); |
| color: var(--text-muted); |
| background: rgba(255,255,255,0.03); |
| font-size: 9px; |
| line-height: 1; |
| } |
| .rank-move.up { |
| color: var(--best); |
| border-color: rgba(74,222,128,0.28); |
| background: rgba(74,222,128,0.08); |
| } |
| .rank-move.down { |
| color: #f87171; |
| border-color: rgba(248,113,113,0.26); |
| background: rgba(248,113,113,0.07); |
| } |
| .rank-move.new { |
| color: var(--accent-muted); |
| border-color: rgba(194,182,255,0.30); |
| background: rgba(194,182,255,0.08); |
| } |
| .td-model { |
| text-align: left; |
| font-weight: 500; |
| font-size: 14px; |
| color: var(--text); |
| } |
| .td-model a { |
| color: var(--text); |
| text-decoration: none; |
| transition: color 0.15s; |
| } |
| .td-model a:hover { color: var(--accent); } |
| .td-model .model-org { |
| display: block; |
| font-size: 11px; |
| color: var(--text-muted); |
| font-weight: 400; |
| margin-top: 1px; |
| } |
| .td-model .model-org a { |
| color: var(--text-muted); |
| text-decoration: none; |
| transition: color 0.15s, border-color 0.15s; |
| border-bottom: 1px solid transparent; |
| } |
| .td-model .model-org a:hover { |
| color: var(--accent-muted); |
| border-bottom-color: var(--accent-muted); |
| } |
| .td-score { |
| font-family: 'Space Mono', monospace; |
| font-size: 12px; |
| letter-spacing: -0.2px; |
| } |
| .td-score.best { |
| color: var(--best); |
| font-weight: 700; |
| } |
| .td-score.na { |
| color: var(--text-muted); |
| font-style: normal; |
| } |
| .td-params { |
| font-family: 'Space Mono', monospace; |
| font-size: 11px; |
| color: var(--text-secondary); |
| } |
| .org-badge { |
| display: inline-block; |
| font-size: 10px; |
| font-weight: 600; |
| padding: 2px 7px; |
| border-radius: 4px; |
| letter-spacing: 0.03em; |
| font-family: 'Space Mono', monospace; |
| } |
| |
| |
| .legend-bar { |
| display: flex; |
| gap: 20px; |
| margin-bottom: 24px; |
| flex-wrap: wrap; |
| } |
| .legend-item { |
| display: flex; |
| align-items: center; |
| gap: 6px; |
| font-size: 12px; |
| color: var(--text-secondary); |
| } |
| .legend-dot { width: 8px; height: 8px; border-radius: 50%; } |
| .chart-card { |
| background: var(--surface); |
| border: 1px solid var(--border-subtle); |
| border-radius: 10px; |
| padding: 20px; |
| } |
| .chart-card h3 { |
| font-size: 12px; |
| font-weight: 500; |
| color: var(--text-secondary); |
| margin-bottom: 14px; |
| letter-spacing: 0.02em; |
| } |
| .chart-card canvas { |
| max-height: 460px; |
| } |
| |
| .about-box { |
| background: var(--surface); |
| border: 1px solid var(--border-subtle); |
| border-left: 3px solid var(--accent); |
| border-radius: 0 10px 10px 0; |
| padding: 28px 32px; |
| } |
| .about-box h3 { |
| font-size: 20px; |
| font-weight: 500; |
| margin-bottom: 10px; |
| color: var(--text); |
| letter-spacing: -0.5px; |
| } |
| .about-box p { |
| font-size: 14px; |
| color: var(--text-secondary); |
| line-height: 1.7; |
| } |
| .about-box a { |
| color: var(--accent); |
| text-decoration: none; |
| border-bottom: 1px solid transparent; |
| transition: border-color 0.15s; |
| } |
| .about-box a:hover { border-bottom-color: var(--accent); } |
| |
| |
| footer { |
| margin-top: 100px; |
| padding: 32px 0; |
| border-top: 1px solid var(--border-subtle); |
| display: flex; |
| justify-content: space-between; |
| align-items: center; |
| font-size: 11px; |
| color: var(--text-muted); |
| font-family: 'Space Mono', monospace; |
| } |
| footer a { |
| color: var(--text-secondary); |
| text-decoration: none; |
| margin-left: 16px; |
| transition: color 0.15s; |
| } |
| footer a:hover { color: var(--accent); } |
| |
| |
| @media (max-width: 768px) { |
| .container { padding: 0 20px; } |
| .hero { padding: 60px 0 40px; } |
| .insight-grid { grid-template-columns: repeat(2, minmax(0, 1fr)); } |
| .chart-card canvas { max-height: 360px; } |
| footer { flex-direction: column; gap: 12px; text-align: center; } |
| footer a { margin: 0 8px; } |
| } |
| @media (max-width: 520px) { |
| .insight-grid { grid-template-columns: 1fr; } |
| } |
| </style> |
| <base target="_blank"> |
| </head> |
| <body> |
|
|
| <div class="container"> |
|
|
| |
| <section class="hero"> |
| <h1>Open <span>SLM Leaderboard</span></h1> |
| <p class="hero-sub"> |
| A leaderboard for sub-150M parameter language models, evaluated using LM-eval harness |
| or a custom benchmark script available here |
| <a href="https://huggingface.co/datasets/axiomiclabs/Arithmark-2.0" target="_blank" rel="noopener noreferrer">Arithmark-2.0</a>. |
| </p> |
| </section> |
|
|
| |
| <section id="highlights" style="padding-top:20px;"> |
| <div class="insight-grid" id="insight-grid"></div> |
| </section> |
|
|
| |
| <section id="leaderboard" style="padding-top:40px;"> |
| <h2 class="section-title">Leaderboard</h2> |
| <p class="section-sub">Zero-shot evaluation. Higher is better for all columns. Click any header to sort.</p> |
| <div class="filter-group"> |
| <span class="filter-label">Model size</span> |
| <div class="filter-bar" id="filter-bar"></div> |
| </div> |
| <div class="table-wrap"> |
| <table id="leaderboard-table"> |
| <thead> |
| <tr> |
| <th onclick="sortTable('rank')"># <span class="sort-indicator">βΌ</span></th> |
| <th onclick="sortTable('name')">Model <span class="sort-indicator"></span></th> |
| <th onclick="sortTable('params')">Params <span class="sort-indicator"></span></th> |
| <th onclick="sortTable('avg')">Avg <span class="sort-indicator"></span></th> |
| <th onclick="sortTable('hellaswag')">HellaSwag <span class="sort-indicator"></span></th> |
| <th onclick="sortTable('arc')">ARC-Easy <span class="sort-indicator"></span></th> |
| <th onclick="sortTable('arcChall')">ARC-Challenge <span class="sort-indicator"></span></th> |
| <th onclick="sortTable('piqa')">PIQA <span class="sort-indicator"></span></th> |
| <th onclick="sortTable('arithmark2')">ArithMark-2 <span class="sort-indicator"></span></th> |
| </tr> |
| </thead> |
| <tbody id="leaderboard-body"></tbody> |
| </table> |
| </div> |
| </section> |
|
|
| |
| <section id="score-chart" style="padding-top:80px;"> |
| <h2 class="section-title">Scores</h2> |
| <p class="section-sub" id="score-chart-sub">Top scores for the active size and benchmark filters.</p> |
| <div class="filter-group"> |
| <span class="filter-label">Benchmark</span> |
| <div class="filter-bar" id="benchmark-filter-bar"></div> |
| </div> |
| <div class="chart-card"> |
| <h3>Top Avg Scores</h3> |
| <canvas id="barChart"></canvas> |
| </div> |
| </section> |
|
|
| |
| <section id="charts" style="padding-top:80px;"> |
| <h2 class="section-title">Efficiency</h2> |
| <p class="section-sub" id="efficiency-sub">Score vs parameter count (log scale). Shaded zone = above regression line.</p> |
| <div class="legend-bar" id="legend-bar"></div> |
| <div class="chart-card"> |
| <h3>Avg Score vs Log Parameters</h3> |
| <canvas id="scatterChart"></canvas> |
| </div> |
| </section> |
|
|
| |
| <section id="org-performance" style="padding-top:80px;"> |
| <h2 class="section-title">Org Leaderboard</h2> |
| <p class="section-sub" id="org-leaderboard-sub">Average standard deviations above or below the score-vs-size fit line.</p> |
| <div class="table-wrap"> |
| <table id="org-leaderboard-table"> |
| <thead> |
| <tr> |
| <th>#</th> |
| <th>Organization</th> |
| <th>Models</th> |
| <th>Fit Std Devs</th> |
| <th>Mean Avg</th> |
| <th>Best Model vs Fit</th> |
| </tr> |
| </thead> |
| <tbody id="org-leaderboard-body"></tbody> |
| </table> |
| </div> |
| </section> |
|
|
| |
| <section id="about" style="padding-top:80px;"> |
| <div class="about-box"> |
| <h3>Add your model </h3> |
| <p>Open a PR on this Space with your model's results for the given benchmarks. They will be independently verified by our team and then your PR will be merged. Your model must be open weights to qualify. <a href="https://huggingface.co/spaces/AxiomicLabs/Open_SLM_Leaderboard/discussions" target="_blank">Open a PR β</a></p> |
| </div> |
| </section> |
|
|
| |
| <footer> |
| <div>Open SLM Leaderboard by <a href="https://huggingface.co/AxiomicLabs" style="margin:0;">Axiomic Labs</a></div> |
| <div> |
| <a href="#leaderboard">Leaderboard</a> |
| <a href="#score-chart">Scores</a> |
| <a href="#charts">Efficiency</a> |
| <a href="#org-performance">Organizations</a> |
| </div> |
| <div style="font-size:10px;">All results independently verified using our internal verification process.</div> |
| </footer> |
|
|
| </div> |
|
|
| <script> |
| |
| |
| |
| |
| const ORGS = { |
| compactai: { name: 'CompactAI', chartColor: 'rgba(63, 185, 80, 0.70)', chartBorder: '#3fb950', url: 'https://huggingface.co/CompactAI-O' }, |
| supralabs: { name: 'SupraLabs', chartColor: 'rgba(124, 58, 237, 0.70)', chartBorder: '#7c3aed', url: 'https://huggingface.co/SupraLabs' }, |
| axiomiclabs: { name: 'Axiomic Labs', chartColor: 'rgba(194, 182, 255, 0.70)', chartBorder: '#c2b6ff', url: 'https://huggingface.co/AxiomicLabs' }, |
| mihaipopa: { name: 'Mihai Popa', chartColor: 'rgba(247, 129, 102, 0.70)', chartBorder: '#f78166', url: 'https://huggingface.co/MihaiPopa-1' }, |
| lhtechai: { name: 'LH-Tech-AI', chartColor: 'rgba(249, 115, 22, 0.70)', chartBorder: '#f97316', url: 'https://huggingface.co/LH-Tech-AI' }, |
| facebook: { name: 'Facebook', chartColor: 'rgba(24, 119, 242, 0.70)', chartBorder: '#1877f2', url: 'https://huggingface.co/facebook' }, |
| harleyml: { name: 'Harley ML', chartColor: 'rgba(153, 27, 27, 0.70)', chartBorder: '#991b1b', url: 'https://huggingface.co/Harley-ml' }, |
| huggingface: { name: 'HuggingFace', chartColor: 'rgba(255, 204, 0, 0.70)', chartBorder: '#ffcc00', url: 'https://huggingface.co/HuggingFaceTB' }, |
| eleutherai: { name: 'EleutherAI', chartColor: 'rgba(239, 68, 68, 0.70)', chartBorder: '#ef4444', url: 'https://huggingface.co/EleutherAI' }, |
| openai: { name: 'OpenAI', chartColor: 'rgba(16, 163, 127, 0.70)', chartBorder: '#10a37f', url: 'https://huggingface.co/openai-community' }, |
| stentor: { name: 'StentorLabs', chartColor: 'rgba(255, 107, 203, 0.70)', chartBorder: '#ff6bcb', url: 'https://huggingface.co/StentorLabs' }, |
| eclipsesenpai: { name: 'Eclipse-Senpai', chartColor: 'rgba(6, 182, 212, 0.70)', chartBorder: '#06b6d4', url: 'https://huggingface.co/Eclipse-Senpai' }, |
| godelev: { name: 'GODELEV', chartColor: 'rgba(79, 70, 229, 0.70)', chartBorder: '#4f46e5', url: 'https://huggingface.co/godelev' }, |
| sandroeth: { name: 'Sandroeth', chartColor: 'rgba(132, 204, 22, 0.70)', chartBorder: '#84cc16', url: 'https://huggingface.co/Sandroeth' }, |
| veyraai: { name: 'veyra-ai', chartColor: 'rgba(14, 165, 233, 0.70)', chartBorder: '#0ea5e9', url: 'https://huggingface.co/veyra-ai' }, |
| thingai: { name: 'ThingAI', chartColor: 'rgba(180, 83, 9, 0.70)', chartBorder: '#b45309', url: 'https://huggingface.co/ThingAI' }, |
| fromzero: { name: 'FromZero', chartColor: 'rgba(210, 180, 140, 0.70)', chartBorder: '#d2b48c', url: 'https://huggingface.co/fromziro' }, |
| finnianx: { name: 'finnianx', chartColor: 'rgba(45, 212, 191, 0.70)', chartBorder: '#2dd4bf', url: 'https://huggingface.co/finnianx' }, |
| joelhenwang: { name: 'joelhenwang', chartColor: 'rgba(229, 231, 235, 0.70)', chartBorder: '#e5e7eb', url: 'https://huggingface.co/joelhenwang' }, |
| rtc2022: { name: 'RTC', chartColor: 'rgba(245, 158, 11, 0.70)', chartBorder: '#f59e0b', url: 'https://huggingface.co/rtc2022' }, |
| }; |
| |
| const MODELS = [ |
| |
| { name: 'SmolLM2-135M', org: 'huggingface', params: 135000000, paramsDisplay: '135M', arc: 58.63, hellaswag: 43.22, piqa: 68.44, arcChall: 29.69, arithmark2: 32.68, links: { card: 'https://huggingface.co/HuggingFaceTB/SmolLM2-135M' } }, |
| { name: 'GPT-X2-125M', org: 'axiomiclabs', params: 125000000, paramsDisplay: '125M', arc: 51.47, hellaswag: 40.41, piqa: 67.30, arcChall: 27.82, arithmark2: 30.68, links: { card: 'https://huggingface.co/AxiomicLabs/GPT-X2-125M' } }, |
| { name: 'GPT-X-125M', org: 'axiomiclabs', params: 125000000, paramsDisplay: '125M', arc: 50.76, hellaswag: 36.57, piqa: 64.96, arcChall: 26.62, arithmark2: 30.24, links: { card: 'https://huggingface.co/AxiomicLabs/GPT-X-125M' } }, |
| { name: 'MobileLLM-R1-140M-base', org: 'facebook', params: 140000000, paramsDisplay: '140M', arc: 49.92, hellaswag: 33.84, piqa: 63.22, arcChall: 24.74, arithmark2: 53.56, links: { card: 'https://huggingface.co/facebook/MobileLLM-R1-140M-base' } }, |
| { name: 'Supra-50M-Base', org: 'supralabs', params: 51786240, paramsDisplay: '52M', arc: 45.88, hellaswag: 31.83, piqa: 62.51, arcChall: 25.00, arithmark2: 27.04, links: { card: 'https://huggingface.co/SupraLabs/Supra-50M-Base' } }, |
| { name: 'Shard-1', org: 'compactai', params: 54500000, paramsDisplay: '54.5M', arc: 41.12, hellaswag: 29.20, piqa: 58.22, arcChall: 20.99, arithmark2: 26.80, links: { card: 'https://huggingface.co/CompactAI-O/Shard-1' } }, |
| { name: 'Supra-50M-Instruct', org: 'supralabs', params: 51786240, paramsDisplay: '52M', arc: 44.40, hellaswag: 29.09, piqa: 59.47, arcChall: 27.30, arithmark2: 29.12, links: { card: 'https://huggingface.co/SupraLabs/Supra-50M-Instruct' } }, |
| { name: 'Supra-50M-Reasoning', org: 'supralabs', params: 51786240, paramsDisplay: '52M', arc: 44.44, hellaswag: 29.10, piqa: 59.30, arcChall: 27.39, arithmark2: 28.96, links: { card: 'https://huggingface.co/SupraLabs/Supra-50M-Reasoning' } }, |
| { name: 'SmolLM-135M', org: 'huggingface', params: 135000000, paramsDisplay: '135M', arc: 56.31, hellaswag: 42.70, piqa: 68.28, arcChall: 29.01, arithmark2: 28.84, links: { card: 'https://huggingface.co/HuggingFaceTB/SmolLM-135M' } }, |
| { name: 'OPT-125M', org: 'facebook', params: 125000000, paramsDisplay: '125M', arc: 40.28, hellaswag: 31.31, piqa: 62.24, arcChall: 22.70, arithmark2: 24.40, links: { card: 'https://huggingface.co/facebook/opt-125m' } }, |
| { name: 'GPT-S-5M', org: 'axiomiclabs', params: 5160000, paramsDisplay: '5.2M', arc: 33.21, hellaswag: 27.46, piqa: 57.24, arcChall: 21.16, arithmark2: 27.12, links: { card: 'https://huggingface.co/AxiomicLabs/GPT-S-5M' } }, |
| { name: 'GPT-2', org: 'openai', params: 124000000, paramsDisplay: '124M', arc: 39.35, hellaswag: 31.26, piqa: 62.08, arcChall: 22.35, arithmark2: 26.48, links: { card: 'https://huggingface.co/openai-community/gpt2' } }, |
| { name: 'Spark-5M-Base-v4', org: 'lhtechai', params: 5000000, paramsDisplay: '5M', arc: 33.16, hellaswag: 27.03, piqa: 53.32, arcChall: 21.50, arithmark2: 25.00, links: { card: 'https://huggingface.co/LH-Tech-AI/Spark-5M-Base-v4' } }, |
| { name: 'Supra-Mini-v5-8M', org: 'supralabs', params: 7870000, paramsDisplay: '7.87M', arc: 33.21, hellaswag: 26.37, piqa: 54.03, arcChall: 21.16, arithmark2: 24.28, links: { card: 'https://huggingface.co/SupraLabs/Supra-Mini-v5-8M' } }, |
| { name: 'Pythia-70M', org: 'eleutherai', params: 70000000, paramsDisplay: '70M', arc: 31.65, hellaswag: 27.49, piqa: 53.48, arcChall: 23.63, arithmark2: 25.32, links: { card: 'https://huggingface.co/EleutherAI/pythia-70m' } }, |
| { name: 'Supra-Mini-v4-2M', org: 'supralabs', params: 2620000, paramsDisplay: '2.6M', arc: 30.98, hellaswag: 25.52, piqa: 51.90, arcChall: 21.50, arithmark2: 24.08, links: { card: 'https://huggingface.co/SupraLabs/Supra-Mini-v4-2M' } }, |
| { name: 'Pythia-31M', org: 'eleutherai', params: 31000000, paramsDisplay: '31M', arc: 33.88, hellaswag: 27.14, piqa: 56.26, arcChall: 21.67, arithmark2: 27.20, links: { card: 'https://huggingface.co/EleutherAI/pythia-31m' } }, |
| { name: 'Stentor3-50M', org: 'stentor', params: 50000000, paramsDisplay: '50M', arc: 29.67, hellaswag: 27.10, piqa: 53.75, arcChall: 21.67, arithmark2: 29.48, links: { card: 'https://huggingface.co/StentorLabs/Stentor3-50M' } }, |
| { name: 'Stentor3-20M', org: 'stentor', params: 20000000, paramsDisplay: '20M', arc: 29.50, hellaswag: 27.06, piqa: 55.06, arcChall: 23.12, arithmark2: 26.72, links: { card: 'https://huggingface.co/StentorLabs/Stentor3-20M' } }, |
| { name: 'Portimbria-150M', org: 'stentor', params: 151026432, paramsDisplay: '151M', arc: 35.82, hellaswag: 27.09, piqa: 58.27, arcChall: 18.77, arithmark2: 28.04, links: { card: 'https://huggingface.co/StentorLabs/Portimbria-150M' } }, |
| { name: 'nanowhale-100m-base', org: 'huggingface', params: 100000000, paramsDisplay: '100M', arc: 28.79, hellaswag: 26.31, piqa: 51.80, arcChall: 24.83, arithmark2: 25.20, links: { card: 'https://huggingface.co/HuggingFaceTB/nanowhale-100m-base' } }, |
| { name: 'Pythia-14M', org: 'eleutherai', params: 14000000, paramsDisplay: '14M', arc: 32.28, hellaswag: 26.20, piqa: 55.88, arcChall: 20.99, arithmark2: 27.04, links: { card: 'https://huggingface.co/EleutherAI/pythia-14m' } }, |
| { name: 'Tenete-8M', org: 'harleyml', params: 8000000, paramsDisplay: '8M', arc: 31.69, hellaswag: 26.75, piqa: 55.66, arcChall: 21.84, arithmark2: 26.72, links: { card: 'https://huggingface.co/Harley-ml/Tenete-8M' } }, |
| { name: 'Dillion-1.2M', org: 'harleyml', params: 1281384, paramsDisplay: '1.3M', arc: 31.19, hellaswag: 26.65, piqa: 53.05, arcChall: 22.78, arithmark2: 24.80, links: { card: 'https://huggingface.co/Harley-ml/Dillion-1.2M' } }, |
| { name: 'CinnabarLM-1.4M-Base', org: 'mihaipopa', params: 1510000, paramsDisplay: '1.5M', arc: 28.54, hellaswag: 27.08, piqa: 52.50, arcChall: 23.38, arithmark2: 24.96, links: { card: 'https://huggingface.co/MihaiPopa-1/CinnabarLM-1.4M-Base' } }, |
| { name: 'CinnabarLM-4M-Base', org: 'mihaipopa', params: 4230000, paramsDisplay: '4.2M', arc: 28.28, hellaswag: 27.71, piqa: 52.29, arcChall: 22.70, arithmark2: 24.96, links: { card: 'https://huggingface.co/MihaiPopa-1/CinnabarLM-4M-Base' } }, |
| { name: 'CinnabarLM-1.5M-Base', org: 'mihaipopa', params: 1710000, paramsDisplay: '1.7M', arc: 28.11, hellaswag: 27.08, piqa: 52.94, arcChall: 21.93, arithmark2: 25.20, links: { card: 'https://huggingface.co/MihaiPopa-1/CinnabarLM-1.5M-Base' } }, |
| { name: 'Dillionv2-1.3M', org:'harleyml', params: 1285200, paramsDisplay: '1.3M', arc: 29.71, hellaswag: 27.27, piqa: 53.05, arcChall: 22.44, arithmark2: 27.00, links: { card: 'https://huggingface.co/Harley-ml/Dillionv2-1.3M' } }, |
| { name: 'KeyLM-75M', org:'eclipsesenpai',params: 75251200, paramsDisplay: '75M', arc: 35.73, hellaswag: 29.66, piqa: 60.50, arcChall: 23.98, arithmark2: 25.80, links: { card: 'https://huggingface.co/Eclipse-Senpai/KeyLM-75M' } }, |
| { name: 'Supra-Mini-v6-1M', org: 'supralabs', params: 1410688, paramsDisplay: '1.4M', arc: 30.68, hellaswag: 27.23, piqa: 53.70, arcChall: 20.48, arithmark2: 26.48, links: { card: 'https://huggingface.co/SupraLabs/Supra-Mini-v6-1M' } }, |
| { name: 'GPT-S-1.4M', org: 'axiomiclabs', params: 1426000, paramsDisplay: '1.4M', arc: 31.57, hellaswag: 26.89, piqa: 55.17, arcChall: 21.93, arithmark2: 25.16, links: { card: 'https://huggingface.co/AxiomicLabs/GPT-S-1.4M' } }, |
| { name: 'Archaea-74M', org: 'godelev', params: 74016256, paramsDisplay: '74M', arc: 39.06, hellaswag: 27.27, piqa: 58.27, arcChall: 22.70, arithmark2: 29.20, links: { card: 'https://huggingface.co/GODELEV/Archaea-74M' } }, |
| { name: 'Cali-0.1B', org: 'sandroeth', params: 123782400, paramsDisplay: '124M', arc: 27.53, hellaswag: 26.84, piqa: 52.12, arcChall: 24.49, arithmark2: 24.72, links: { card: 'https://huggingface.co/Sandroeth/cali-0.1B' } }, |
| { name: 'Quark-50M', org: 'thingai', params: 56666496, paramsDisplay: '57M', arc: 36.78, hellaswag: 28.48, piqa: 57.83, arcChall: 25.00, arithmark2: 28.20, links: { card: 'https://huggingface.co/ThingAI/Quark-50m' } }, |
| { name: 'Quark-135M', org: 'thingai', params: 134561088, paramsDisplay: '135M', arc: 47.73, hellaswag: 31.33, piqa: 58.32, arcChall: 28.24, arithmark2: 40.32, links: { card: 'https://huggingface.co/ThingAI/Quark-135m' } }, |
| { name: 'Veyra-30M-Base', org: 'veyraai', params: 34611712, paramsDisplay: '35M', arc: 35.90, hellaswag: 27.92, piqa: 58.92, arcChall: 24.15, arithmark2: 26.76, links: { card: 'https://huggingface.co/veyra-ai/veyra-30m-base-5b-tokens' } }, |
| { name: 'Syn-2.6M', org: 'fromzero', params: 2604210, paramsDisplay: '2.6M', arc: 32.03, hellaswag: 26.96, piqa: 53.65, arcChall: 20.39, arithmark2: 26.68, links: { card: 'https://huggingface.co/fromziro/Syn-2.6M' } }, |
| { type: 'orgMovementCutoff', label: 'Org leaderboard movement cutoff' }, |
| { name: 'Ant-5M', org: 'godelev', params: 4713344, paramsDisplay: '4.7M', arc: 26.35, hellaswag: 25.99, piqa: 48.57, arcChall: 25.77, arithmark2: 24.80, links: { card: 'https://huggingface.co/GODELEV/Ant-5m' } }, |
| { name: 'Er-13M', org: 'fromzero', params: 12497520, paramsDisplay: '13M', arc: 35.10, hellaswag: 28.50, piqa: 57.51, arcChall: 20.73, arithmark2: 30.88, links: { card: 'https://huggingface.co/fromziro/Er-13M' } }, |
| { name: 'michel-tiny', org: 'finnianx', params: 55719040, paramsDisplay: '56M', arc: 37.37, hellaswag: 28.15, piqa: 57.34, arcChall: 21.76, arithmark2: 25.28, links: { card: 'https://huggingface.co/finnianx/michel-tiny' } }, |
| { name: 'OdinNext-138M-Base', org: 'joelhenwang', params: 138449696, paramsDisplay: '138M', arc: 45.08, hellaswag: 28.09, piqa: 59.52, arcChall: 23.81, arithmark2: 36.84, links: { card: 'https://huggingface.co/joelhenwang/OdinNext-138M-Base' } }, |
| { name: 'OdinNext-138M-Instruct', org: 'joelhenwang', params: 138451232, paramsDisplay: '138M', arc: 44.40, hellaswag: 28.86, piqa: 58.65, arcChall: 23.12, arithmark2: 36.56, links: { card: 'https://huggingface.co/joelhenwang/OdinNext-138M-Instruct' } }, |
| { name: 'michel-micro', org: 'finnianx', params: 28355072, paramsDisplay: '28M', arc: 38.51, hellaswag: 28.16, piqa: 57.62, arcChall: 23.29, arithmark2: 26.04, links: { card: 'https://huggingface.co/finnianx/michel-micro' } }, |
| { name: 'kirk-tung', org: 'rtc2022', params: 53111296, paramsDisplay: '53M', arc: 30.43, hellaswag: 26.32, piqa: 52.61, arcChall: 22.01, arithmark2: 24.88, links: { card: 'https://huggingface.co/rtc2022/kirk-tung' } }, |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| ]; |
| |
| |
| |
| |
| |
| |
| const BENCHMARKS = ['arc', 'hellaswag', 'piqa', 'arcChall', 'arithmark2']; |
| const METRICS = [ |
| { key: 'avg', label: 'Avg', fullLabel: 'Average Score' }, |
| { key: 'hellaswag', label: 'HellaSwag', fullLabel: 'HellaSwag' }, |
| { key: 'arc', label: 'ARC-Easy', fullLabel: 'ARC-Easy' }, |
| { key: 'arcChall', label: 'ARC-Challenge', fullLabel: 'ARC-Challenge' }, |
| { key: 'piqa', label: 'PIQA', fullLabel: 'PIQA' }, |
| { key: 'arithmark2', label: 'ArithMark-2', fullLabel: 'ArithMark-2' }, |
| ]; |
| const RANDOM_BASELINES = { |
| avg: 31.25, |
| hellaswag: 25, |
| arc: 25, |
| arcChall: 25, |
| piqa: 50, |
| arithmark2: 25 |
| }; |
| let sortCol = 'avg'; |
| let sortAsc = false; |
| let activeFilter = 'all'; |
| let activeBenchmark = 'avg'; |
| let chartInstances = {}; |
| const ORG_MOVEMENT_CUTOFF_TYPE = 'orgMovementCutoff'; |
| |
| function getAvg(m) { |
| |
| |
| const components = []; |
| if (m.hellaswag !== null && m.hellaswag !== undefined) components.push(m.hellaswag); |
| |
| const hasArc = m.arc !== null && m.arc !== undefined; |
| const hasArcChall = m.arcChall !== null && m.arcChall !== undefined; |
| if (hasArc && hasArcChall) components.push((m.arc + m.arcChall) / 2); |
| else if (hasArc) components.push(m.arc); |
| else if (hasArcChall) components.push(m.arcChall); |
| |
| if (m.piqa !== null && m.piqa !== undefined) components.push(m.piqa); |
| if (m.arithmark2 !== null && m.arithmark2 !== undefined) components.push(m.arithmark2); |
| |
| return components.length >= 2 ? components.reduce((a, b) => a + b, 0) / components.length : null; |
| } |
| |
| function getSortVal(m, col) { |
| if (col === 'avg') return getMetricValue(m, 'avg') ?? -Infinity; |
| if (col === 'params') return m.params ?? 0; |
| if (col === 'rank') return -(getMetricValue(m, 'avg') ?? -Infinity); |
| return m[col] ?? -Infinity; |
| } |
| |
| function getMetricValue(m, metric = activeBenchmark) { |
| return metric === 'avg' ? getAvg(m) : m[metric]; |
| } |
| |
| function getMetricLabel(metric = activeBenchmark, full = false) { |
| const item = METRICS.find(x => x.key === metric); |
| return item ? (full ? item.fullLabel : item.label) : metric; |
| } |
| |
| function getModelEntries(models = MODELS) { |
| return models.filter(m => !m.type); |
| } |
| |
| function getFilteredModels(models = MODELS) { |
| let arr = getModelEntries(models); |
| if (activeFilter === '<10M') arr = arr.filter(m => m.params && m.params < 10000000); |
| else if (activeFilter === '<50M') arr = arr.filter(m => m.params && m.params < 50000000); |
| else if (activeFilter === '<100M') arr = arr.filter(m => m.params && m.params < 100000000); |
| return arr; |
| } |
| |
| function getOrgMovementBaselineModels() { |
| const cutoffIndex = MODELS.findIndex(m => m.type === ORG_MOVEMENT_CUTOFF_TYPE); |
| const baselineModels = cutoffIndex === -1 ? MODELS : MODELS.slice(0, cutoffIndex); |
| return getFilteredModels(baselineModels); |
| } |
| |
| function getChartModels() { |
| return getFilteredModels() |
| .filter(m => getMetricValue(m, activeBenchmark) !== null && getMetricValue(m, activeBenchmark) !== undefined); |
| } |
| |
| function getAvgModels(models = getFilteredModels()) { |
| return models |
| .map(m => ({ ...m, avg: getAvg(m) })) |
| .filter(m => m.avg !== null && m.avg !== undefined); |
| } |
| |
| function getSortedModels() { |
| const arr = getFilteredModels(); |
| arr.sort((a, b) => { |
| const av = getSortVal(a, sortCol); |
| const bv = getSortVal(b, sortCol); |
| return sortAsc ? (av > bv ? 1 : -1) : (av > bv ? -1 : 1); |
| }); |
| return arr; |
| } |
| |
| function formatSigned(value) { |
| if (value === null || value === undefined) return 'N/A'; |
| return `${value >= 0 ? '+' : ''}${value.toFixed(2)}`; |
| } |
| |
| function topByMetric(metric) { |
| return getFilteredModels() |
| .map(m => ({ ...m, score: getMetricValue(m, metric) })) |
| .filter(m => m.score !== null && m.score !== undefined) |
| .sort((a, b) => b.score - a.score)[0] || null; |
| } |
| |
| function getStdDev(values) { |
| if (!values.length) return 0; |
| const mean = values.reduce((sum, v) => sum + v, 0) / values.length; |
| const variance = values.reduce((sum, v) => sum + Math.pow(v - mean, 2), 0) / values.length; |
| return Math.sqrt(variance); |
| } |
| |
| function getOrgFitRows(modelsForRows = getFilteredModels()) { |
| const models = getAvgModels(modelsForRows) |
| .filter(m => m.params) |
| .map(m => ({ ...m, logParams: Math.log10(m.params) })); |
| |
| if (models.length < 2) return { residualStd: null, rows: [] }; |
| |
| const { slope, intercept } = linearRegression(models.map(m => [m.logParams, m.avg])); |
| const withFit = models.map(m => { |
| const expected = slope * m.logParams + intercept; |
| return { ...m, expected, residual: m.avg - expected }; |
| }); |
| const residualStd = getStdDev(withFit.map(m => m.residual)); |
| const denom = residualStd || 1; |
| |
| const grouped = {}; |
| withFit.forEach(m => { |
| if (!grouped[m.org]) grouped[m.org] = []; |
| grouped[m.org].push({ ...m, fitZ: m.residual / denom }); |
| }); |
| |
| const rows = Object.entries(grouped).map(([orgKey, items]) => { |
| const avgScore = items.reduce((sum, m) => sum + m.avg, 0) / items.length; |
| const meanFitZ = items.reduce((sum, m) => sum + m.fitZ, 0) / items.length; |
| const bestFit = [...items].sort((a, b) => b.fitZ - a.fitZ)[0]; |
| return { |
| orgKey, |
| org: ORGS[orgKey], |
| count: items.length, |
| avgScore, |
| meanFitZ, |
| topModel: bestFit.name, |
| topScore: bestFit.avg, |
| topFitZ: bestFit.fitZ |
| }; |
| }).sort((a, b) => b.meanFitZ - a.meanFitZ); |
| |
| return { residualStd, rows }; |
| } |
| |
| function getOrgRankMovement(rows, baselineRows) { |
| const previousRanks = new Map(baselineRows.map((row, i) => [row.orgKey, i + 1])); |
| return rows.reduce((movement, row, i) => { |
| const currentRank = i + 1; |
| const previousRank = previousRanks.get(row.orgKey); |
| if (!previousRank) { |
| movement[row.orgKey] = { className: 'new', label: 'new' }; |
| } else { |
| const delta = previousRank - currentRank; |
| movement[row.orgKey] = { |
| className: delta > 0 ? 'up' : delta < 0 ? 'down' : 'same', |
| label: delta === 0 ? '-' : `${delta > 0 ? 'β ' : 'β '}${Math.abs(delta)}` |
| }; |
| } |
| return movement; |
| }, {}); |
| } |
| |
| |
| function scoreColor(value, min, max) { |
| if (value === null || value === undefined || min === max) return ''; |
| let pct = (value - min) / (max - min); |
| pct = Math.max(0, Math.min(1, Math.pow(pct, 1.5))); |
| const r = Math.round(107 + (74 - 107) * pct); |
| const g = Math.round(107 + (222 - 107) * pct); |
| const b = Math.round(110 + (128 - 110) * pct); |
| return `color: rgb(${r},${g},${b})`; |
| } |
| |
| |
| |
| |
| |
| function renderFilters() { |
| const bar = document.getElementById('filter-bar'); |
| const filters = ['all', '<100M', '<50M', '<10M']; |
| bar.innerHTML = filters.map(f => ` |
| <button class="filter-btn ${activeFilter === f ? 'active' : ''}" onclick="setFilter('${f}')">${f === 'all' ? 'All' : f}</button> |
| `).join(''); |
| } |
| |
| function renderBenchmarkFilters() { |
| const benchmarkBar = document.getElementById('benchmark-filter-bar'); |
| if (!benchmarkBar) return; |
| benchmarkBar.innerHTML = METRICS.map(m => ` |
| <button class="filter-btn ${activeBenchmark === m.key ? 'active' : ''}" onclick="setBenchmark('${m.key}')">${m.label}</button> |
| `).join(''); |
| } |
| |
| function renderHighlights() { |
| const grid = document.getElementById('insight-grid'); |
| if (!grid) return; |
| |
| const filtered = getFilteredModels(); |
| const orgCount = new Set(filtered.map(m => m.org).filter(Boolean)).size; |
| const filterMeta = activeFilter === 'all' ? 'All parameter sizes' : `${activeFilter} parameters`; |
| const statCards = [ |
| { |
| label: 'Models tracked', |
| value: filtered.length, |
| meta: filterMeta, |
| border: 'var(--accent)' |
| }, |
| { |
| label: 'Orgs represented', |
| value: orgCount, |
| meta: filterMeta, |
| border: 'var(--accent-muted)' |
| } |
| ]; |
| |
| const metricCards = METRICS.map(metric => { |
| const winner = topByMetric(metric.key); |
| if (!winner) { |
| return { |
| label: `Best ${metric.label}`, |
| value: 'No data', |
| score: '', |
| meta: filterMeta, |
| border: 'var(--border-strong)' |
| }; |
| } |
| const org = ORGS[winner.org]; |
| return { |
| label: `Best ${metric.label}`, |
| value: winner.name, |
| href: winner.links?.card, |
| score: `${winner.score.toFixed(2)}%`, |
| meta: `<a href="${org.url}" target="_blank">${org.name}</a> · ${winner.paramsDisplay}`, |
| border: org.chartBorder |
| }; |
| }); |
| |
| grid.innerHTML = [ |
| ...statCards.map(card => ` |
| <div class="insight-card" style="border-top-color:${card.border}"> |
| <span class="insight-label">${card.label}</span> |
| <span class="insight-value stat">${card.value}</span> |
| <span class="insight-meta">${card.meta}</span> |
| </div> |
| `), |
| ...metricCards.map(card => ` |
| <div class="insight-card" style="border-top-color:${card.border}"> |
| <span class="insight-label">${card.label}</span> |
| ${card.href ? `<a class="insight-value" href="${card.href}" target="_blank">${card.value}</a>` : `<span class="insight-value">${card.value}</span>`} |
| ${card.score ? `<span class="insight-score">${card.score}</span>` : ''} |
| <span class="insight-meta">${card.meta}</span> |
| </div> |
| `) |
| ].join(''); |
| } |
| |
| function setFilter(f) { |
| activeFilter = f; |
| renderFilters(); |
| renderHighlights(); |
| renderTable(); |
| renderBarChart(); |
| renderScatter(); |
| renderOrgLeaderboard(); |
| } |
| |
| function setBenchmark(metric) { |
| activeBenchmark = metric; |
| renderBenchmarkFilters(); |
| renderBarChart(); |
| renderScatter(); |
| } |
| |
| |
| |
| |
| |
| function renderTable() { |
| const tbody = document.getElementById('leaderboard-body'); |
| const sorted = getSortedModels(); |
| const allModels = getFilteredModels(); |
| |
| |
| const getBest = (key) => { |
| const vals = allModels.map(m => m[key]).filter(v => v !== null && v !== undefined); |
| return vals.length ? Math.max(...vals) : null; |
| }; |
| const getMin = (key) => { |
| const vals = allModels.map(m => m[key]).filter(v => v !== null && v !== undefined); |
| return vals.length ? Math.min(...vals) : null; |
| }; |
| |
| const best = {}; |
| const mins = {}; |
| BENCHMARKS.forEach(k => { best[k] = getBest(k); mins[k] = getMin(k); }); |
| const avgVals = allModels.map(getAvg).filter(v => v !== null); |
| best.avg = avgVals.length ? Math.max(...avgVals) : null; |
| mins.avg = avgVals.length ? Math.min(...avgVals) : null; |
| |
| const fmtCell = (val, key) => { |
| if (val === null || val === undefined) return '<span class="td-score na">β</span>'; |
| const isBest = best[key] !== null && Math.abs(val - best[key]) < 0.001; |
| const cls = isBest ? 'best' : ''; |
| return `<span class="td-score ${cls}" style="${scoreColor(val, mins[key], best[key])}">${val.toFixed(2)}%</span>`; |
| }; |
| |
| tbody.innerHTML = sorted.map((m, i) => { |
| const org = ORGS[m.org]; |
| const rank = i + 1; |
| const avg = getAvg(m); |
| |
| return ` |
| <tr> |
| <td class="td-rank">${rank}</td> |
| <td class="td-model"><a href="${m.links.card}" target="_blank">${m.name}</a><span class="model-org"><a href="${org.url}" target="_blank">${org.name}</a> Β· ${m.paramsDisplay}</span></td> |
| <td class="td-params">${m.paramsDisplay}</td> |
| <td class="td-score">${avg !== null ? `<span class="${Math.abs(avg - best.avg) < 0.001 ? 'best' : ''}" style="${scoreColor(avg, mins.avg, best.avg)}">${avg.toFixed(2)}%</span>` : '<span class="td-score na">β</span>'}</td> |
| <td class="td-score">${fmtCell(m.hellaswag, 'hellaswag')}</td> |
| <td class="td-score">${fmtCell(m.arc, 'arc')}</td> |
| <td class="td-score">${fmtCell(m.arcChall, 'arcChall')}</td> |
| <td class="td-score">${fmtCell(m.piqa, 'piqa')}</td> |
| <td class="td-score">${fmtCell(m.arithmark2, 'arithmark2')}</td> |
| </tr> |
| `; |
| }).join(''); |
| |
| |
| document.querySelectorAll('thead th').forEach(th => { |
| th.classList.remove('sorted'); |
| const ind = th.querySelector('.sort-indicator'); |
| if (ind) ind.textContent = ''; |
| }); |
| const colMap = { rank: 0, name: 1, params: 2, avg: 3, hellaswag: 4, arc: 5, arcChall: 6, piqa: 7, arithmark2: 8 }; |
| const thIdx = colMap[sortCol]; |
| if (thIdx !== undefined) { |
| const th = document.querySelectorAll('thead th')[thIdx]; |
| th.classList.add('sorted'); |
| const ind = th.querySelector('.sort-indicator'); |
| if (ind) ind.textContent = sortAsc ? 'β²' : 'βΌ'; |
| } |
| } |
| |
| function sortTable(col) { |
| if (sortCol === col) { |
| sortAsc = !sortAsc; |
| } else { |
| sortCol = col; |
| sortAsc = false; |
| } |
| renderTable(); |
| } |
| |
| |
| |
| |
| |
| function renderBarChart() { |
| const canvas = document.getElementById('barChart'); |
| if (!canvas) return; |
| const ctx = canvas.getContext('2d'); |
| if (chartInstances['bar']) chartInstances['bar'].destroy(); |
| |
| const metricLabel = getMetricLabel(activeBenchmark, true); |
| const sub = document.getElementById('score-chart-sub'); |
| if (sub) sub.textContent = `Top ${metricLabel.toLowerCase()} scores for the active size filter.`; |
| |
| const models = getChartModels() |
| .map(m => ({ ...m, score: getMetricValue(m, activeBenchmark) })) |
| .filter(m => m.score !== null && m.score !== undefined) |
| .sort((a, b) => b.score - a.score) |
| .slice(0, 12); |
| |
| if (!models.length) { |
| canvas.parentElement.querySelector('h3').textContent = `Top ${metricLabel} Scores - no models with data`; |
| return; |
| } |
| |
| canvas.parentElement.querySelector('h3').textContent = `Top ${models.length} ${metricLabel} Scores`; |
| |
| chartInstances['bar'] = new Chart(ctx, { |
| type: 'bar', |
| data: { |
| labels: models.map(m => m.name), |
| datasets: [{ |
| label: metricLabel, |
| data: models.map(m => m.score), |
| backgroundColor: models.map(m => ORGS[m.org].chartColor.replace('0.70', '0.82')), |
| borderColor: models.map(m => ORGS[m.org].chartBorder), |
| borderWidth: 1.5, |
| borderRadius: 4, |
| borderSkipped: false |
| }] |
| }, |
| options: { |
| indexAxis: 'y', |
| responsive: true, |
| maintainAspectRatio: true, |
| animation: { duration: 0 }, |
| plugins: { |
| legend: { display: false }, |
| tooltip: { |
| backgroundColor: 'rgba(20,20,22,0.96)', |
| borderColor: 'rgba(194,182,255,0.25)', |
| borderWidth: 1, |
| titleColor: '#e8e8e8', |
| bodyColor: '#6b6b6e', |
| padding: 10, |
| displayColors: false, |
| callbacks: { |
| label: (item) => { |
| const model = models[item.dataIndex]; |
| return [`${metricLabel}: ${item.raw.toFixed(2)}%`, `Params: ${model.paramsDisplay}`]; |
| } |
| } |
| } |
| }, |
| scales: { |
| x: { |
| min: 0, |
| suggestedMax: Math.max(...models.map(m => m.score)) * 1.08, |
| title: { |
| display: true, |
| text: `${metricLabel} (%)`, |
| color: '#454548', |
| font: { family: "'Space Mono', monospace", size: 10 } |
| }, |
| grid: { color: 'rgba(255,255,255,0.03)' }, |
| ticks: { color: '#454548', font: { family: "'Space Mono', monospace", size: 10 }, callback: v => v + '%', maxTicksLimit: 8 }, |
| border: { display: false } |
| }, |
| y: { |
| grid: { display: false }, |
| ticks: { color: '#6b6b6e', font: { family: "'DM Sans', system-ui, sans-serif", size: 11 } }, |
| border: { display: false } |
| } |
| } |
| } |
| }); |
| } |
| |
| function renderOrgLeaderboard() { |
| const tbody = document.getElementById('org-leaderboard-body'); |
| if (!tbody) return; |
| |
| const { residualStd, rows } = getOrgFitRows(); |
| const { rows: baselineRows } = getOrgFitRows(getOrgMovementBaselineModels()); |
| const movement = getOrgRankMovement(rows, baselineRows); |
| const sub = document.getElementById('org-leaderboard-sub'); |
| const filterText = activeFilter === 'all' ? 'all model sizes' : `${activeFilter} models`; |
| if (sub) { |
| sub.textContent = residualStd === null |
| ? `Need at least two models with Avg and parameter counts for ${filterText}.` |
| : `Average standard deviations above or below the Avg-vs-size fit line across ${filterText}. Movement compares against rankings above the cutoff marker. Residual std dev: ${residualStd.toFixed(2)} pts.`; |
| } |
| |
| if (!rows.length) { |
| tbody.innerHTML = ` |
| <tr> |
| <td class="td-rank">-</td> |
| <td class="td-model">No organization data</td> |
| <td class="td-params">-</td> |
| <td class="td-score na">N/A</td> |
| <td class="td-score na">N/A</td> |
| <td class="td-model"><span class="model-org">Try a broader size filter.</span></td> |
| </tr> |
| `; |
| return; |
| } |
| |
| tbody.innerHTML = rows.map((row, i) => { |
| const zClass = row.meanFitZ >= 0 ? 'best' : ''; |
| const move = movement[row.orgKey] || { className: 'same', label: '-' }; |
| return ` |
| <tr> |
| <td class="td-rank">${i + 1}</td> |
| <td class="td-model"><span class="org-title-line"><a href="${row.org.url}" target="_blank">${row.org.name}</a><span class="rank-move ${move.className}">${move.label}</span></span><span class="model-org">Best residual: ${row.topModel}</span></td> |
| <td class="td-params">${row.count}</td> |
| <td class="td-score"><span class="${zClass}">${formatSigned(row.meanFitZ)} std</span></td> |
| <td class="td-score">${row.avgScore.toFixed(2)}%</td> |
| <td class="td-model">${row.topModel}<span class="model-org">${row.topScore.toFixed(2)}% avg · ${formatSigned(row.topFitZ)} std</span></td> |
| </tr> |
| `; |
| }).join(''); |
| } |
| |
| function renderLegend() { |
| const bar = document.getElementById('legend-bar'); |
| bar.innerHTML = Object.entries(ORGS).map(([key, o]) => ` |
| <div class="legend-item"><span class="legend-dot" style="background:${o.chartBorder}"></span>${o.name}</div> |
| `).join(''); |
| } |
| |
| function linearRegression(points) { |
| const n = points.length; |
| if (n < 2) return { slope: 0, intercept: 0 }; |
| let sumX = 0, sumY = 0, sumXY = 0, sumXX = 0; |
| for (const [x, y] of points) { |
| sumX += x; sumY += y; sumXY += x * y; sumXX += x * x; |
| } |
| const denom = n * sumXX - sumX * sumX; |
| if (Math.abs(denom) < 1e-10) return { slope: 0, intercept: sumY / n }; |
| const slope = (n * sumXY - sumX * sumY) / denom; |
| const intercept = (sumY - slope * sumX) / n; |
| return { slope, intercept }; |
| } |
| |
| function renderScatter() { |
| const canvas = document.getElementById('scatterChart'); |
| if (!canvas) return; |
| const ctx = canvas.getContext('2d'); |
| if (chartInstances['scatter']) chartInstances['scatter'].destroy(); |
| |
| const metricLabel = getMetricLabel(activeBenchmark, true); |
| const sub = document.getElementById('efficiency-sub'); |
| if (sub) sub.textContent = `${metricLabel} vs parameter count (log scale). Shaded zone = above regression line. Dotted line = random baseline.`; |
| |
| const models = getChartModels() |
| .filter(m => m.params && getMetricValue(m, activeBenchmark) !== null && getMetricValue(m, activeBenchmark) !== undefined) |
| .map(m => ({ |
| x: Math.log10(m.params), |
| y: getMetricValue(m, activeBenchmark), |
| name: m.name, |
| org: m.org, |
| params: m.paramsDisplay |
| })); |
| |
| if (models.length < 2) { |
| canvas.parentElement.querySelector('h3').textContent = `${metricLabel} vs Log Parameters - need 2+ models with data`; |
| return; |
| } |
| canvas.parentElement.querySelector('h3').textContent = `${metricLabel} vs Log Parameters`; |
| |
| |
| const points = models.map(m => [m.x, m.y]); |
| const { slope, intercept } = linearRegression(points); |
| const residualStd = getStdDev(models.map(m => m.y - (slope * m.x + intercept))); |
| models.forEach(m => { |
| const residual = m.y - (slope * m.x + intercept); |
| m.fitZ = residual / (residualStd || 1); |
| }); |
| |
| |
| const xMin = Math.min(...models.map(m => m.x)); |
| const xMax = Math.max(...models.map(m => m.x)); |
| const pad = (xMax - xMin) * 0.1; |
| const xLow = xMin - pad; |
| const xHigh = xMax + pad; |
| const yAtLow = slope * xLow + intercept; |
| const yAtHigh = slope * xHigh + intercept; |
| const randomBaseline = RANDOM_BASELINES[activeBenchmark] ?? null; |
| const yMaxSource = randomBaseline === null ? models.map(m => m.y) : [...models.map(m => m.y), randomBaseline]; |
| const yMax = Math.max(...yMaxSource) * 1.08; |
| |
| renderLegend(); |
| |
| chartInstances['scatter'] = new Chart(ctx, { |
| type: 'scatter', |
| data: { |
| datasets: [ |
| |
| { |
| type: 'line', |
| label: 'Regression', |
| data: [{ x: xLow, y: yAtLow }, { x: xHigh, y: yAtHigh }], |
| borderColor: 'rgba(194, 182, 255, 0.45)', |
| borderWidth: 1.5, |
| borderDash: [5, 4], |
| pointRadius: 0, |
| fill: 'end', |
| backgroundColor: 'rgba(194, 182, 255, 0.10)', |
| order: 0 |
| }, |
| |
| { |
| type: 'line', |
| label: 'Random', |
| data: randomBaseline === null ? [] : [{ x: xLow, y: randomBaseline }, { x: xHigh, y: randomBaseline }], |
| borderColor: 'rgba(107, 107, 110, 0.75)', |
| borderWidth: 1.25, |
| borderDash: [2, 4], |
| pointRadius: 0, |
| fill: false, |
| order: 1 |
| }, |
| |
| { |
| label: 'Models', |
| data: models, |
| backgroundColor: models.map(m => ORGS[m.org].chartColor.replace('0.70', '0.90')), |
| borderColor: models.map(m => ORGS[m.org].chartBorder), |
| borderWidth: 1.5, |
| pointRadius: 7, |
| pointHoverRadius: 10, |
| order: 2 |
| } |
| ] |
| }, |
| options: { |
| responsive: true, |
| maintainAspectRatio: true, |
| animation: { duration: 0 }, |
| plugins: { |
| legend: { display: false }, |
| tooltip: { |
| backgroundColor: 'rgba(20,20,22,0.96)', |
| borderColor: 'rgba(194,182,255,0.25)', |
| borderWidth: 1, |
| titleColor: '#e8e8e8', |
| bodyColor: '#6b6b6e', |
| padding: 10, |
| displayColors: false, |
| callbacks: { |
| title: (items) => items[0]?.raw?.name || '', |
| label: (item) => { |
| const d = item.raw; |
| return [ |
| `Params: ${d.params}`, |
| `${metricLabel}: ${d.y?.toFixed(2)}%`, |
| `Fit residual: ${formatSigned(d.fitZ)} std` |
| ]; |
| } |
| } |
| } |
| }, |
| scales: { |
| x: { |
| type: 'linear', |
| min: xLow, |
| max: xHigh, |
| title: { |
| display: true, |
| text: 'Logββ(Parameters)', |
| color: '#454548', |
| font: { family: "'Space Mono', monospace", size: 10 } |
| }, |
| grid: { color: 'rgba(255,255,255,0.03)' }, |
| ticks: { color: '#454548', font: { family: "'Space Mono', monospace", size: 10 }, maxTicksLimit: 8 }, |
| border: { display: false } |
| }, |
| y: { |
| min: Math.max(0, (yAtLow - (yMax - yAtLow) * 0.3)), |
| max: yMax, |
| title: { |
| display: true, |
| text: `${metricLabel} (%)`, |
| color: '#454548', |
| font: { family: "'Space Mono', monospace", size: 10 } |
| }, |
| grid: { color: 'rgba(255,255,255,0.03)' }, |
| ticks: { color: '#454548', font: { family: "'Space Mono', monospace", size: 10 }, callback: v => v.toFixed(0) + '%', maxTicksLimit: 8 }, |
| border: { display: false } |
| } |
| } |
| } |
| }); |
| } |
| |
| |
| |
| |
| |
| window.addEventListener('DOMContentLoaded', () => { |
| renderFilters(); |
| renderBenchmarkFilters(); |
| renderHighlights(); |
| renderTable(); |
| renderBarChart(); |
| renderLegend(); |
| renderScatter(); |
| renderOrgLeaderboard(); |
| }); |
| </script> |
| </body> |
| </html> |
|
|