Spaces:

DakshBeniwal111
/

bdh-sparse-brain

Sleeping

App Files Files Community

DakshBeniwal111 commited on 9 days ago

Commit

694ce87

verified ·

1 Parent(s): 9cf446e

Delete streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +0 -761

streamlit_app.py DELETED Viewed

@@ -1,761 +0,0 @@
-import streamlit as st
-st.set_page_config(
-    page_title="BDH Sparse Brain",
-    page_icon="🐉",
-    layout="wide",
-    initial_sidebar_state="collapsed",
-)
-import torch
-import torch.nn.functional as F
-import numpy as np
-import matplotlib
-matplotlib.use("Agg")
-import matplotlib.pyplot as plt
-import matplotlib.gridspec as gridspec
-from bdh_core import BDHModel, BDHConfig, TransformerModel
-# ══════════════════════════════════════════════════════════════════════════════
-#  GLOBAL CSS  —  cinematic dark-lab aesthetic
-# ══════════════════════════════════════════════════════════════════════════════
-st.markdown("""
-<style>
-@import url('https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&family=Outfit:wght@300;400;600;800&display=swap');
-*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
-html, body, .stApp {
-  background: #05080f !important;
-  color: #d4dce8;
-  font-family: 'Outfit', sans-serif;
-}
-/* kill streamlit chrome */
-#MainMenu, footer, header { visibility: hidden !important; }
-.block-container {
-  padding: 2rem 2.5rem !important;
-  max-width: 1300px !important;
-}
-/* ── sidebar ── */
-section[data-testid="stSidebar"] {
-  background: #080c18 !important;
-  border-right: 1px solid #131c30;
-}
-section[data-testid="stSidebar"] * { color: #a8b8cc !important; }
-section[data-testid="stSidebar"] h1,
-section[data-testid="stSidebar"] h2,
-section[data-testid="stSidebar"] h3 { color: #e8734a !important; }
-/* ── typography ── */
-h1, h2, h3 {
-  font-family: 'Outfit', sans-serif !important;
-  font-weight: 800 !important;
-  color: #f0f4fa !important;
-  letter-spacing: -0.02em;
-}
-/* ── inputs ── */
-textarea, .stTextArea textarea {
-  background: #0c1220 !important;
-  color: #d4dce8 !important;
-  border: 1px solid #1e2d45 !important;
-  border-radius: 10px !important;
-  font-family: 'Space Mono', monospace !important;
-  font-size: 0.82rem !important;
-  resize: none !important;
-}
-textarea:focus { border-color: #e8734a !important; outline: none !important; box-shadow: 0 0 0 3px rgba(232,115,74,0.15) !important; }
-/* ── sliders ── */
-.stSlider [data-baseweb="slider"] { padding: 0.3rem 0; }
-/* ── tabs ── */
-.stTabs [data-baseweb="tab-list"] {
-  background: transparent !important;
-  border-bottom: 1px solid #131c30;
-  gap: 0;
-}
-.stTabs [data-baseweb="tab"] {
-  background: transparent !important;
-  color: #5a7a99 !important;
-  font-family: 'Outfit', sans-serif !important;
-  font-weight: 600 !important;
-  font-size: 0.88rem !important;
-  padding: 0.6rem 1.2rem !important;
-  border: none !important;
-  border-bottom: 2px solid transparent !important;
-}
-.stTabs [aria-selected="true"] {
-  color: #e8734a !important;
-  border-bottom: 2px solid #e8734a !important;
-  background: transparent !important;
-}
-.stTabs [data-baseweb="tab-highlight"] { display: none !important; }
-.stTabs [data-baseweb="tab-panel"] { padding-top: 1.5rem !important; }
-/* ── buttons ── */
-.stButton > button {
-  background: linear-gradient(135deg, #e8734a, #c94f2a) !important;
-  color: white !important;
-  border: none !important;
-  border-radius: 10px !important;
-  font-family: 'Outfit', sans-serif !important;
-  font-weight: 600 !important;
-  padding: 0.6rem 1.6rem !important;
-  letter-spacing: 0.02em;
-  transition: opacity 0.2s !important;
-}
-.stButton > button:hover { opacity: 0.88 !important; }
-/* ── custom components ── */
-.page-header {
-  padding: 2.5rem 0 2rem;
-  border-bottom: 1px solid #131c30;
-  margin-bottom: 2rem;
-}
-.page-header .eyebrow {
-  font-family: 'Space Mono', monospace;
-  font-size: 0.72rem;
-  color: #e8734a;
-  letter-spacing: 0.18em;
-  text-transform: uppercase;
-  margin-bottom: 0.5rem;
-}
-.page-header h1 {
-  font-size: 2.8rem !important;
-  line-height: 1.0 !important;
-  background: linear-gradient(135deg, #f0f4fa 0%, #e8734a 100%);
-  -webkit-background-clip: text;
-  background-clip: text;
-  color: transparent !important;
-  margin-bottom: 0.6rem;
-}
-.page-header .sub {
-  color: #5a7a99;
-  font-size: 1rem;
-  font-weight: 300;
-  max-width: 620px;
-}
-.stat-grid { display: grid; grid-template-columns: repeat(4, 1fr); gap: 1rem; margin: 1.8rem 0; }
-.stat-card {
-  background: #080c18;
-  border: 1px solid #131c30;
-  border-radius: 14px;
-  padding: 1.2rem 1rem;
-  position: relative;
-  overflow: hidden;
-}
-.stat-card::before {
-  content: '';
-  position: absolute;
-  top: 0; left: 0; right: 0;
-  height: 2px;
-  background: linear-gradient(90deg, #e8734a, transparent);
-}
-.stat-card.blue::before { background: linear-gradient(90deg, #3b7dd8, transparent); }
-.stat-card .val {
-  font-family: 'Space Mono', monospace;
-  font-size: 1.9rem;
-  font-weight: 700;
-  color: #e8734a;
-  line-height: 1.1;
-}
-.stat-card.blue .val { color: #3b7dd8; }
-.stat-card .lbl {
-  font-size: 0.76rem;
-  color: #5a7a99;
-  margin-top: 0.4rem;
-  font-weight: 400;
-  letter-spacing: 0.02em;
-}
-.stat-card .icon { font-size: 1.1rem; margin-bottom: 0.4rem; }
-.insight {
-  background: #080c18;
-  border-left: 3px solid #e8734a;
-  border-radius: 0 10px 10px 0;
-  padding: 1rem 1.2rem;
-  margin: 0.8rem 0;
-  font-size: 0.88rem;
-  color: #a8b8cc;
-  line-height: 1.6;
-}
-.insight b { color: #f0f4fa; }
-.section-label {
-  font-family: 'Space Mono', monospace;
-  font-size: 0.68rem;
-  color: #e8734a;
-  letter-spacing: 0.15em;
-  text-transform: uppercase;
-  margin-bottom: 0.8rem;
-}
-.badge {
-  display: inline-block;
-  padding: 3px 12px;
-  border-radius: 999px;
-  font-size: 0.75rem;
-  font-weight: 600;
-  font-family: 'Space Mono', monospace;
-  margin-bottom: 0.6rem;
-}
-.badge-orange { background: rgba(232,115,74,0.15); color: #e8734a; border: 1px solid rgba(232,115,74,0.3); }
-.badge-blue   { background: rgba(59,125,216,0.15); color: #3b7dd8; border: 1px solid rgba(59,125,216,0.3); }
-.output-box {
-  background: #080c18;
-  border: 1px solid #131c30;
-  border-radius: 12px;
-  padding: 1rem 1.2rem;
-  font-family: 'Space Mono', monospace;
-  font-size: 0.78rem;
-  color: #a8b8cc;
-  min-height: 60px;
-  word-break: break-all;
-  line-height: 1.6;
-}
-.loss-tag {
-  font-family: 'Space Mono', monospace;
-  font-size: 0.8rem;
-  color: #5a7a99;
-  margin-top: 0.5rem;
-}
-.loss-tag span { color: #e8734a; }
-.divider { border: none; border-top: 1px solid #131c30; margin: 2rem 0; }
-</style>
-""", unsafe_allow_html=True)
-# ── Plot theme constants ──────────────────────────────────────────────────────
-BG   = "#05080f"
-CARD = "#080c18"
-GRID = "#131c30"
-TICK = "#3a5070"
-ORNG = "#e8734a"
-BLUE = "#3b7dd8"
-TEXT = "#d4dce8"
-MUTE = "#5a7a99"
-def _ax(fig, axes):
-    fig.patch.set_facecolor(BG)
-    for ax in (axes if hasattr(axes, '__iter__') else [axes]):
-        ax.set_facecolor(CARD)
-        ax.tick_params(colors=TICK, labelsize=8)
-        for s in ax.spines.values():
-            s.set_color(GRID)
-        ax.xaxis.label.set_color(MUTE)
-        ax.yaxis.label.set_color(MUTE)
-# ── Model loading ─────────────────────────────────────────────────────────────
-@st.cache_resource(show_spinner=False)
-def load_models():
-    cfg = BDHConfig(vocab_size=256, n_layer=4, n_head=4, n_embd=128)
-    bdh = BDHModel(cfg).eval()
-    tf  = TransformerModel(cfg).eval()
-    return bdh, tf, cfg
-def tokenise(text, max_len=64):
-    t = [min(b, 255) for b in text.encode()][:max_len]
-    if len(t) < 2: t += [32] * (2 - len(t))
-    return torch.tensor([t], dtype=torch.long)
-# ── Chart builders ────────────────────────────────────────────────────────────
-@st.cache_data(show_spinner=False)
-def chart_bar(bdh_vals, tf_vals):
-    n = len(bdh_vals)
-    x = np.arange(n)
-    w = 0.32
-    fig, ax = plt.subplots(figsize=(8, 3.4), facecolor=BG)
-    b1 = ax.bar(x - w/2, bdh_vals, w, color=ORNG, alpha=0.9, zorder=3, label="BDH (ReLU)")
-    b2 = ax.bar(x + w/2, tf_vals,  w, color=BLUE, alpha=0.9, zorder=3, label="Transformer (GELU)")
-    ax.axhline(5,   color=ORNG, ls="--", lw=1.1, alpha=0.45)
-    ax.axhline(100, color=BLUE, ls=":",  lw=1.1, alpha=0.25)
-    ax.set_xticks(x); ax.set_xticklabels([f"L{i}" for i in x], color=TICK)
-    ax.set_ylim(0, 115); ax.yaxis.grid(True, color=GRID, zorder=0); ax.set_axisbelow(True)
-    ax.set_title("Active Neurons per Layer (%)", color=TEXT, fontsize=10, fontweight="bold", pad=10, fontfamily="monospace")
-    _ax(fig, [ax])
-    ax.legend(facecolor=CARD, edgecolor=GRID, labelcolor=TEXT, fontsize=8, framealpha=0.9)
-    for bar, c in [(b1, ORNG), (b2, BLUE)]:
-        for b in bar:
-            ax.text(b.get_x()+b.get_width()/2, b.get_height()+1.8,
-                    f"{b.get_height():.0f}%", ha="center", va="bottom",
-                    color=c, fontsize=7.5, fontweight="bold", fontfamily="monospace")
-    fig.tight_layout(pad=1.2)
-    return fig
-@st.cache_data(show_spinner=False)
-def chart_heatmap(data_bytes, title, cmap):
-    data = np.frombuffer(data_bytes, dtype=np.float32).reshape(-1, 64)
-    fig, ax = plt.subplots(figsize=(7, 2.8), facecolor=BG)
-    vmin, vmax = float(np.min(data)), float(np.max(data))
-    if np.isclose(vmin, vmax): vmax = vmin + 1e-6
-    im = ax.imshow(data.T, aspect="auto", cmap=cmap, vmin=vmin, vmax=vmax, interpolation="nearest")
-    ax.set_xlabel("Token →", color=MUTE, fontsize=8)
-    ax.set_ylabel("Neuron →", color=MUTE, fontsize=8)
-    ax.set_title(title, color=TEXT, fontsize=9, fontweight="bold", pad=8, fontfamily="monospace")
-    _ax(fig, [ax])
-    cb = fig.colorbar(im, ax=ax, fraction=0.022, pad=0.02)
-    cb.ax.tick_params(colors=TICK, labelsize=7)
-    plt.setp(cb.ax.get_yticklabels(), color=TICK)
-    fig.tight_layout(pad=1.2)
-    return fig
-@st.cache_data(show_spinner=False)
-def chart_memory():
-    T = np.arange(0, 110_000, 400)
-    hs, nh, nl, db = 32, 4, 4, 2
-    bdh_m = np.full(len(T), nl*nh*hs**2*db/1e6, dtype=float)
-    tf_m  = T * 2*nh*hs*db / 1e6
-    fig, ax = plt.subplots(figsize=(9, 3.4), facecolor=BG)
-    ax.fill_between(T/1000, bdh_m, alpha=0.10, color=ORNG)
-    ax.fill_between(T/1000, tf_m,  alpha=0.10, color=BLUE)
-    ax.plot(T/1000, bdh_m, color=ORNG, lw=2.2, label="BDH — O(1) Hebbian state")
-    ax.plot(T/1000, tf_m,  color=BLUE, lw=2.2, label="Transformer — O(T) KV-cache")
-    ax.axvline(12, color="#e05252", ls="--", lw=1.4)
-    ax.text(13.5, tf_m.max()*0.62, "⚠ OOM\n~12k", color="#e05252", fontsize=8, fontweight="bold", fontfamily="monospace")
-    ax.annotate("BDH flat\nat 50k+ ✓", xy=(50, bdh_m[0]), xytext=(60, bdh_m[0]+0.07),
-                color=ORNG, fontsize=8, fontweight="bold", fontfamily="monospace",
-                arrowprops=dict(arrowstyle="->", color=ORNG, lw=1.2))
-    ax.set_xlabel("Sequence length (k tokens)", color=MUTE, fontsize=9)
-    ax.set_ylabel("Memory (MB)", color=MUTE, fontsize=9)
-    ax.set_title("Memory Scaling: O(1) vs O(T)", color=TEXT, fontsize=10, fontweight="bold", pad=10, fontfamily="monospace")
-    _ax(fig, [ax]); ax.yaxis.grid(True, color=GRID); ax.set_axisbelow(True)
-    ax.legend(facecolor=CARD, edgecolor=GRID, labelcolor=TEXT, fontsize=9, framealpha=0.9)
-    fig.tight_layout(pad=1.2)
-    return fig
-def chart_hebbian(sigma_list, layer):
-    if not sigma_list or layer >= len(sigma_list):
-        return None
-    sigma = sigma_list[layer]
-    H = sigma.shape[0]
-    fig, axes = plt.subplots(1, H, figsize=(10, 2.6), facecolor=BG)
-    if H == 1: axes = [axes]
-    for h, ax in enumerate(axes):
-        m = sigma[h]; vabs = np.abs(m).max()+1e-8
-        im = ax.imshow(m, cmap="RdBu_r", vmin=-vabs, vmax=vabs, interpolation="nearest")
-        ax.set_title(f"Head {h}", color="#fdba74", fontsize=9, fontfamily="monospace")
-        ax.set_facecolor(BG)
-        ax.tick_params(colors=TICK, labelsize=6)
-        for s in ax.spines.values(): s.set_color(GRID)
-    fig.suptitle(f"Hebbian Synaptic State σ — Layer {layer}",
-                 color=TEXT, fontsize=9, fontweight="bold", fontfamily="monospace")
-    fig.tight_layout(pad=1.0)
-    return fig
-def chart_topology(bdh_model):
-    w = bdh_model.blocks[0].attn.qkv.weight.detach().cpu().numpy()
-    fig, (ax0, ax1) = plt.subplots(1, 2, figsize=(11, 3.4), facecolor=BG)
-    im = ax0.imshow(np.abs(w[:64,:64]), cmap="inferno", interpolation="nearest")
-    ax0.set_title("QKV Weight |W| — hub structure", color=TEXT, fontsize=9, fontweight="bold", pad=8, fontfamily="monospace")
-    fig.colorbar(im, ax=ax0, fraction=0.04)
-    norms = np.linalg.norm(w, axis=0)
-    ax1.hist(norms, bins=40, color=ORNG, alpha=0.88, edgecolor=BG)
-    ax1.set_xlabel("Column norm (hub-ness)", color=MUTE, fontsize=8)
-    ax1.set_ylabel("Count", color=MUTE, fontsize=8)
-    ax1.set_title("Hub Degree Distribution\n(heavy tail = scale-free)", color=TEXT, fontsize=9, fontweight="bold", pad=8, fontfamily="monospace")
-    ax1.yaxis.grid(True, color=GRID); ax1.set_axisbelow(True)
-    _ax(fig, [ax0, ax1]); fig.tight_layout(pad=1.2)
-    return fig
-def chart_neuron_bar(acts, top_idx):
-    top_val = acts[top_idx]
-    colors  = [ORNG if v > 0 else BLUE for v in top_val]
-    fig, ax = plt.subplots(figsize=(8, 2.8), facecolor=BG)
-    ax.bar([f"N{n}" for n in top_idx], top_val, color=colors, zorder=3)
-    ax.axhline(0, color=GRID, lw=0.8)
-    ax.set_title("Top Neuron Activations — BDH (sparse → interpretable)", color=TEXT,
-                 fontsize=9, fontweight="bold", pad=8, fontfamily="monospace")
-    ax.tick_params(colors=TICK, labelrotation=40, labelsize=8)
-    ax.yaxis.grid(True, color=GRID); ax.set_axisbelow(True)
-    _ax(fig, [ax]); fig.tight_layout(pad=1.2)
-    return fig
-# ── Generate text helper ──────────────────────────────────────────────────────
-@torch.no_grad()
-def generate(model, idx, n=35, temp=1.0, top_k=10, is_bdh=False):
-    out = idx.clone()
-    for _ in range(n):
-        logits = model(out)[0] if is_bdh else model(out)
-        logits = logits[:, -1, :] / temp
-        v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
-        logits[logits < v[:, [-1]]] = float('-inf')
-        out = torch.cat([out, torch.multinomial(F.softmax(logits, dim=-1), 1)], dim=1)
-    return out
-# ══════════════════════════════════════════════════════════════════════════════
-#  MAIN
-# ════��═════════════════════════════════════════════════════════════════════════
-def main():
-    bdh_model, tf_model, cfg = load_models()
-    # ── Header ──
-    st.markdown("""
-    <div class="page-header">
-      <div class="eyebrow">Post-Transformer Hackathon · Pathway × IIT Ropar</div>
-      <h1>🐉 BDH Sparse Brain Visualizer</h1>
-      <div class="sub">
-        Interactive exploration of sparse neural computation, Hebbian memory &amp;
-        interpretable activations in the Dragon Hatchling architecture.
-      </div>
-    </div>
-    """, unsafe_allow_html=True)
-    # ── Controls row ──
-    col_in, col_layer = st.columns([3, 1])
-    with col_in:
-        input_text = st.text_area(
-            "Input text",
-            value="The dragon hatchling thinks with sparse neurons that fire together and wire together.",
-            height=90, label_visibility="collapsed"
-        )
-    with col_layer:
-        st.markdown("<div style='height:0.3rem'></div>", unsafe_allow_html=True)
-        layer_idx = st.slider("Hebbian layer", 0, cfg.n_layer - 1, 0)
-        st.markdown(f"<div style='font-family:Space Mono;font-size:0.7rem;color:{MUTE};margin-top:0.3rem'>layer {layer_idx} selected</div>", unsafe_allow_html=True)
-    # ── Run models (cached via session state to prevent shaking) ──
-    tok_key = input_text[:80]
-    if "last_tok_key" not in st.session_state or st.session_state.last_tok_key != tok_key:
-        tokens = tokenise(input_text)
-        T = tokens.shape[1]
-        with torch.no_grad():
-            bdh_stats  = bdh_model.get_activation_stats(tokens)
-            tf_stats   = tf_model.get_activation_stats(tokens)
-            sigma_list = bdh_model.get_hebbian_state(tokens)
-            bdh_logits, _ = bdh_model(tokens)
-            tf_logits      = tf_model(tokens)
-            tgt = torch.cat([tokens[:, 1:], tokens[:, -1:]], dim=1)
-            bdh_loss = F.cross_entropy(bdh_logits.reshape(-1, cfg.vocab_size), tgt.reshape(-1)).item()
-            tf_loss  = F.cross_entropy(tf_logits.reshape(-1, cfg.vocab_size),  tgt.reshape(-1)).item()
-            bdh_out  = generate(bdh_model, tokens, is_bdh=True)
-            tf_out   = generate(tf_model,  tokens, is_bdh=False)
-        st.session_state.update({
-            "last_tok_key": tok_key,
-            "bdh_stats": bdh_stats, "tf_stats": tf_stats,
-            "sigma_list": sigma_list, "T": T,
-            "bdh_loss": bdh_loss, "tf_loss": tf_loss,
-            "bdh_text": bytes(bdh_out.squeeze(0).tolist()).decode(errors="replace"),
-            "tf_text":  bytes(tf_out.squeeze(0).tolist()).decode(errors="replace"),
-        })
-    ss = st.session_state
-    bdh_stats  = ss["bdh_stats"]
-    tf_stats   = ss["tf_stats"]
-    sigma_list = ss["sigma_list"]
-    T          = ss["T"]
-    avg_bdh  = np.mean([s["frac_active"] for s in bdh_stats]) * 100
-    avg_tf   = np.mean([s["frac_active"] for s in tf_stats])  * 100
-    hebb_kb  = (cfg.n_layer * cfg.n_head * cfg.head_size**2 * 2) / 1024
-    kv_kb    = (T * 2 * cfg.n_head * cfg.head_size * 2) / 1024
-    # ── Stat cards ──
-    st.markdown(f"""
-    <div class="stat-grid">
-      <div class="stat-card">
-        <div class="icon">🐉</div>
-        <div class="val">{avg_bdh:.1f}%</div>
-        <div class="lbl">BDH Neurons Active</div>
-      </div>
-      <div class="stat-card blue">
-        <div class="icon">🤖</div>
-        <div class="val">{avg_tf:.1f}%</div>
-        <div class="lbl">Transformer Neurons Active</div>
-      </div>
-      <div class="stat-card">
-        <div class="icon">⚡</div>
-        <div class="val">{hebb_kb:.0f} KB</div>
-        <div class="lbl">BDH Memory (constant)</div>
-      </div>
-      <div class="stat-card blue">
-        <div class="icon">📈</div>
-        <div class="val">{kv_kb:.0f} KB</div>
-        <div class="lbl">Transformer KV-Cache (grows)</div>
-      </div>
-    </div>
-    """, unsafe_allow_html=True)
-    st.markdown(f"""
-    <div style="text-align:center;padding:0.6rem 0 1.4rem;font-family:'Space Mono',monospace;font-size:0.82rem;color:{MUTE};">
-      Processing <span style="color:{TEXT};font-weight:700">{T} tokens</span> &nbsp;·&nbsp;
-      BDH <span style="color:{ORNG};font-weight:700">{avg_bdh:.1f}%</span> active
-      &nbsp;vs&nbsp;
-      Transformer <span style="color:{BLUE};font-weight:700">{avg_tf:.1f}%</span> active
-      &nbsp;·&nbsp; <span style="color:{MUTE}">untrained model — sparsity increases after training</span>
-    </div>
-    """, unsafe_allow_html=True)
-    # ── Output comparison ──
-    st.markdown("<hr class='divider'>", unsafe_allow_html=True)
-    st.markdown("<div class='section-label'>Model Output Comparison</div>", unsafe_allow_html=True)
-    oc1, oc2 = st.columns(2)
-    with oc1:
-        st.markdown("<div class='badge badge-orange'>🐉 BDH Output</div>", unsafe_allow_html=True)
-        st.markdown(f"<div class='output-box'>{ss['bdh_text'][:300]}</div>", unsafe_allow_html=True)
-        st.markdown(f"<div class='loss-tag'>cross-entropy loss: <span>{ss['bdh_loss']:.4f}</span></div>", unsafe_allow_html=True)
-    with oc2:
-        st.markdown("<div class='badge badge-blue'>🤖 Transformer Output</div>", unsafe_allow_html=True)
-        st.markdown(f"<div class='output-box'>{ss['tf_text'][:300]}</div>", unsafe_allow_html=True)
-        st.markdown(f"<div class='loss-tag'>cross-entropy loss: <span style='color:{BLUE}'>{ss['tf_loss']:.4f}</span></div>", unsafe_allow_html=True)
-    st.markdown("<hr class='divider'>", unsafe_allow_html=True)
-    # ── Tabs ──
-    tab1, tab2, tab3, tab4, tab5 = st.tabs([
-        "⚡  Activation Sparsity",
-        "🧠  Hebbian Memory",
-        "📈  Memory Scaling",
-        "🌐  Graph Topology",
-        "🔥  Live Training",
-    ])
-    # ─────────────────────────────────── TAB 1 ───
-    with tab1:
-        st.markdown("""
-        <div class="insight">
-        <b>Core BDH insight:</b> BDH uses <b>ReLU</b> activations — hard-zeros all negative values → natural ~5% sparsity.
-        Transformers use <b>GELU</b> which never outputs exactly zero → ~100% active. Same input. Dramatically different neural behaviour.
-        </div>""", unsafe_allow_html=True)
-        bdh_vals = tuple(s["frac_active"]*100 for s in bdh_stats)
-        tf_vals  = tuple(s["frac_active"]*100 for s in tf_stats)
-        fig = chart_bar(bdh_vals, tf_vals)
-        st.pyplot(fig, use_container_width=True); plt.close(fig)
-        st.markdown("<div class='section-label' style='margin-top:1.5rem'>Activation Heatmaps — Layer 0</div>", unsafe_allow_html=True)
-        hc1, hc2 = st.columns(2)
-        acts_bdh = bdh_stats[0]["activations"]
-        acts_tf  = tf_stats[0]["activations"]
-        data_bdh = acts_bdh[:, :64].astype(np.float32)
-        data_tf  = acts_tf[:, :64].astype(np.float32)
-        with hc1:
-            st.markdown("<div class='badge badge-orange'>🐉 BDH — ReLU sparse</div>", unsafe_allow_html=True)
-            fig = chart_heatmap(data_bdh.tobytes(),
-                                f"BDH L0 — {bdh_stats[0]['frac_active']*100:.1f}% active", "hot")
-            st.pyplot(fig, use_container_width=True); plt.close(fig)
-        with hc2:
-            st.markdown("<div class='badge badge-blue'>🤖 Transformer — GELU dense</div>", unsafe_allow_html=True)
-            fig = chart_heatmap(data_tf.tobytes(),
-                                f"Transformer L0 — {tf_stats[0]['frac_active']*100:.1f}% active", "Blues")
-            st.pyplot(fig, use_container_width=True); plt.close(fig)
-        st.markdown("<div class='section-label' style='margin-top:1.5rem'>Per-Layer Summary</div>", unsafe_allow_html=True)
-        cols = st.columns(len(bdh_stats))
-        for i, (bs, ts) in enumerate(zip(bdh_stats, tf_stats)):
-            with cols[i]:
-                st.metric(f"Layer {i}",
-                          f"BDH {bs['frac_active']*100:.1f}%",
-                          delta=f"TF {ts['frac_active']*100:.1f}%")
-        st.markdown("<hr class='divider'>", unsafe_allow_html=True)
-        st.markdown("<div class='section-label'>Neuron Inspector</div>", unsafe_allow_html=True)
-        nc1, nc2 = st.columns(2)
-        with nc1:
-            l_sel = st.select_slider("Layer", options=list(range(len(bdh_stats))), value=0, key="ni_l")
-        with nc2:
-            max_tok = bdh_stats[0]["activations"].shape[0] - 1
-            t_sel = st.select_slider("Token position", options=list(range(max_tok+1)), value=0, key="ni_t")
-        acts   = bdh_stats[l_sel]["activations"][t_sel]
-        top_idx = np.argsort(np.abs(acts))[-12:]
-        toks_list = list(input_text.encode("utf-8"))
-        byte_val  = toks_list[t_sel] if t_sel < len(toks_list) else 63
-        char_repr = chr(byte_val) if 32 <= byte_val < 127 else "·"
-        st.markdown(f"""
-        <div style="font-family:'Space Mono',monospace;font-size:0.78rem;color:{MUTE};margin-bottom:0.8rem">
-          token <span style="color:{TEXT}">{t_sel}</span> →
-          byte <span style="color:{ORNG}">{byte_val}</span>
-          (<span style="color:{TEXT}">{char_repr!r}</span>)
-          &nbsp;·&nbsp; {(acts>0).sum()} / {len(acts)} neurons firing
-        </div>""", unsafe_allow_html=True)
-        fig = chart_neuron_bar(acts, top_idx)
-        st.pyplot(fig, use_container_width=True); plt.close(fig)
-        st.markdown("""
-        <div class="insight" style="margin-top:0.8rem">
-        Because BDH activates only ~5% of neurons per token, you can point to exactly which neurons matter for each prediction.
-        This is <b>built-in interpretability</b> — transformer dense activations make this kind of inspection practically impossible.
-        </div>""", unsafe_allow_html=True)
-    # ─────────────────────────────────── TAB 2 ───
-    with tab2:
-        st.markdown("""
-        <div class="insight">
-        <b>"Neurons that fire together, wire together."</b> — Hebb's rule<br><br>
-        BDH maintains a fixed-size synaptic state matrix <b>σ</b> that strengthens when neurons co-activate.
-        Memory size is <b>constant</b> — O(n_head × head_size²) — regardless of sequence length.
-        </div>""", unsafe_allow_html=True)
-        fig = chart_hebbian(sigma_list, layer=layer_idx)
-        if fig:
-            st.pyplot(fig, use_container_width=True); plt.close(fig)
-        hb1, hb2 = st.columns(2)
-        with hb1:
-            st.markdown(f"""
-            <div style="font-family:'Space Mono',monospace;font-size:0.8rem;line-height:1.8;color:{MUTE}">
-            <span style="color:{TEXT}">Each cell (i,j)</span> = synapse between neuron i and j<br>
-            🔴 Red = excitatory connection<br>
-            🔵 Blue = inhibitory connection<br>
-            ⚪ White = weak / no connection
-            </div>""", unsafe_allow_html=True)
-        with hb2:
-            st.markdown(f"""
-            <div style="font-family:'Space Mono',monospace;font-size:0.8rem;line-height:1.8;color:{MUTE}">
-            BDH Hebbian state: <span style="color:{ORNG}">{hebb_kb:.0f} KB</span> (fixed forever)<br>
-            Transformer at {T} tokens: <span style="color:{BLUE}">{kv_kb:.0f} KB</span><br>
-            Transformer at 50k tokens: <span style="color:#e05252">{(50000*2*cfg.n_head*cfg.head_size*2)//1024} KB</span>
-            </div>""", unsafe_allow_html=True)
-        st.markdown("<div class='section-label' style='margin-top:1.5rem'>All Layers</div>", unsafe_allow_html=True)
-        for li in range(len(sigma_list)):
-            with st.expander(f"Layer {li}"):
-                fig = chart_hebbian(sigma_list, layer=li)
-                if fig:
-                    st.pyplot(fig, use_container_width=True); plt.close(fig)
-    # ─────────────────────────────────── TAB 3 ───
-    with tab3:
-        st.markdown("""
-        <div class="insight">
-        Transformer KV-caches grow linearly with every token — eventually crashing the GPU.
-        BDH's Hebbian state is <b>constant size forever</b>. Community experiments confirm BDH running 50k+ tokens
-        with flat memory while transformers OOM at ~12k on identical hardware.
-        </div>""", unsafe_allow_html=True)
-        fig = chart_memory()
-        st.pyplot(fig, use_container_width=True); plt.close(fig)
-        mc1, mc2, mc3 = st.columns(3)
-        for col, v, l in [(mc1,"O(1)","BDH complexity"), (mc2,"O(T)","Transformer complexity"), (mc3,"50k+","Max tokens (BDH)")]:
-            with col:
-                st.markdown(f"""<div class="stat-card" style="text-align:center">
-                <div class="val">{v}</div><div class="lbl">{l}</div></div>""", unsafe_allow_html=True)
-        st.markdown(f"""
-        <div style="margin-top:1.5rem;font-family:'Space Mono',monospace;font-size:0.8rem;color:{MUTE};line-height:2">
-        Applications unlocked:<br>
-        Healthcare — full patient history in context &nbsp;·&nbsp;
-        Legal — entire contracts reasoned at once &nbsp;·&nbsp;
-        Research — thousands of papers synthesised &nbsp;·&nbsp;
-        Code — large codebases in one pass
-        </div>""", unsafe_allow_html=True)
-    # ─────────────────────────────────── TAB 4 ───
-    with tab4:
-        st.markdown("""
-        <div class="insight">
-        BDH weight matrices form <b>scale-free networks</b> — a few hub neurons connect broadly (like brain hubs),
-        most connect sparsely. This structure emerges from ReLU-lowrank dynamics and is the architectural
-        basis for monosemantic synapses.
-        </div>""", unsafe_allow_html=True)
-        fig = chart_topology(bdh_model)
-        st.pyplot(fig, use_container_width=True); plt.close(fig)
-        tc1, tc2 = st.columns(2)
-        with tc1:
-            st.markdown(f"""<div class="insight">
-            <b>In neuroscience:</b> biological neural connectivity follows power-law distributions with hub nodes.
-            BDH replicates this naturally — transformers do not.
-            </div>""", unsafe_allow_html=True)
-        with tc2:
-            st.markdown(f"""<div class="insight">
-            <b>Why it matters:</b> Hub neurons act as concept anchors.
-            This is the basis for BDH's monosemantic synapses — neurons that consistently encode
-            specific concepts (e.g. "currency synapse", "country synapse").
-            </div>""", unsafe_allow_html=True)
-    # ─────────────────────────────────── TAB 5 ───
-    with tab5:
-        st.markdown("""
-        <div class="insight">
-        Train tiny BDH and Transformer from scratch on random sequences.
-        Watch BDH's activation rate converge toward ~5% as ReLU neurons learn selectivity.
-        Transformer neurons stay dense throughout training.
-        </div>""", unsafe_allow_html=True)
-        n_steps = st.slider("Training steps", 50, 300, 150, step=50)
-        if st.button("▶  Start Training", type="primary"):
-            tcfg = BDHConfig(vocab_size=128, n_layer=2, n_head=4, n_embd=64)
-            b_m  = BDHModel(tcfg).eval()
-            t_m  = TransformerModel(tcfg).eval()
-            ob   = torch.optim.AdamW(b_m.parameters(), lr=3e-4)
-            ot   = torch.optim.AdamW(t_m.parameters(), lr=3e-4)
-            b_log, t_log, b_loss_log, t_loss_log, xs = [], [], [], [], []
-            prog = st.progress(0)
-            ph   = st.empty()
-            def batch(V=128, B=2, T=24):
-                x = torch.randint(0, V, (B, T))
-                return x, torch.cat([x[:, 1:], x[:, :1]], dim=1)
-            for step in range(n_steps):
-                x, y = batch()
-                b_m.train()
-                lg, _ = b_m(x)
-                lb = F.cross_entropy(lg.view(-1,128), y.view(-1))
-                ob.zero_grad(); lb.backward(); ob.step()
-                t_m.train()
-                lt = F.cross_entropy(t_m(x).view(-1,128), y.view(-1))
-                ot.zero_grad(); lt.backward(); ot.step()
-                if step % 10 == 0 or step == n_steps-1:
-                    b_m.eval(); t_m.eval()
-                    tx = torch.randint(0, 128, (1, 24))
-                    ab = np.mean([s["frac_active"] for s in b_m.get_activation_stats(tx)]) * 100
-                    at = np.mean([s["frac_active"] for s in t_m.get_activation_stats(tx)]) * 100
-                    b_log.append(ab); t_log.append(at)
-                    b_loss_log.append(float(lb)); t_loss_log.append(float(lt))
-                    xs.append(step)
-                    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 3.8), facecolor=BG)
-                    _ax(fig, [ax1, ax2])
-                    ax1.plot(xs, b_log, "o-", color=ORNG, lw=2, ms=4, label="BDH (ReLU)")
-                    ax1.plot(xs, t_log, "s-", color=BLUE, lw=2, ms=4, label="Transformer (GELU)")
-                    ax1.axhline(5,   color=ORNG, ls="--", lw=1, alpha=0.5)
-                    ax1.axhline(100, color=BLUE, ls=":",  lw=1, alpha=0.3)
-                    ax1.set_xlabel("Training step", color=MUTE); ax1.set_ylabel("% Active", color=MUTE)
-                    ax1.set_title("Activation Rate", color=TEXT, fontweight="bold", fontfamily="monospace")
-                    ax1.set_ylim(0, 110); ax1.yaxis.grid(True, color=GRID); ax1.set_axisbelow(True)
-                    ax1.legend(facecolor=CARD, edgecolor=GRID, labelcolor=TEXT, fontsize=8)
-                    ax2.plot(xs, b_loss_log, "-", color=ORNG, lw=2, label="BDH loss")
-                    ax2.plot(xs, t_loss_log, "-", color=BLUE, lw=2, label="Transformer loss")
-                    ax2.set_xlabel("Training step", color=MUTE); ax2.set_ylabel("Loss", color=MUTE)
-                    ax2.set_title("Training Loss", color=TEXT, fontweight="bold", fontfamily="monospace")
-                    ax2.yaxis.grid(True, color=GRID); ax2.set_axisbelow(True)
-                    ax2.legend(facecolor=CARD, edgecolor=GRID, labelcolor=TEXT, fontsize=8)
-                    fig.tight_layout(pad=1.2)
-                    ph.pyplot(fig, use_container_width=True); plt.close(fig)
-                    prog.progress((step+1)/n_steps)
-            st.success(f"Done — BDH: **{b_log[-1]:.1f}%** active · Transformer: **{t_log[-1]:.1f}%** active")
-            st.markdown("""
-            <div class="insight" style="margin-top:0.8rem">
-            BDH's ReLU neurons learned <b>selectivity</b> during training — firing only for strongly relevant inputs.
-            Transformer GELU neurons stayed dense. This selectivity is the foundation of BDH's interpretability.
-            </div>""", unsafe_allow_html=True)
-    # ── Footer ──
-    st.markdown("<hr class='divider'>", unsafe_allow_html=True)
-    st.markdown(f"""
-    <div style="text-align:center;font-family:'Space Mono',monospace;font-size:0.72rem;color:{MUTE};padding-bottom:1rem">
-    Built for the Beyond Transformers Hackathon · Pathway × IIT Ropar E-Summit '26 &nbsp;·&nbsp;
-    <a href="https://arxiv.org/abs/2509.26507" style="color:{ORNG};text-decoration:none">arXiv:2509.26507</a> &nbsp;·&nbsp;
-    <a href="https://github.com/pathwaycom/bdh" style="color:{ORNG};text-decoration:none">github.com/pathwaycom/bdh</a>
-    </div>""", unsafe_allow_html=True)
-if __name__ == "__main__":
-    main()