AbstractPhil
/

geolip-SVAE

TensorBoard

Model card Files Files and versions

xet

Metrics Training metrics Community

AbstractPhil commited on 24 days ago

Commit

526518a

verified ·

1 Parent(s): 396b9e6

Update 111m_proto_1024_v3_geometrically_cv_aligned.py

Browse files

Files changed (1) hide show

111m_proto_1024_v3_geometrically_cv_aligned.py +88 -98

111m_proto_1024_v3_geometrically_cv_aligned.py CHANGED Viewed

@@ -1,16 +1,22 @@
 """
-SVAE — Binding Constant Alignment
-====================================
-Matrix: 1024 × 24
-D=24 → native CV = 0.29154 (the binding constant)
-The 24 singular values ARE a 24-dimensional embedding.
-Each image produces a point in R^24 (its spectrum).
-The distribution of spectra across the batch should have
-CV = 0.29154 — the geometric phase boundary.
-The SVD bottleneck doesn't just compress.
-It produces representations at the binding constant.
 pip install "git+https://github.com/AbstractEyes/geolip-core.git"
 """
@@ -28,11 +34,10 @@ try:
     print("Using geolip-core SVD (Gram + eigh)")
 except ImportError:
     HAS_GEOLIP = False
-    print("geolip-core not found, using torch.svd_lowrank fallback")
-    print('Install: pip install "git+https://github.com/AbstractEyes/geolip-core.git"')
-# ── CM primitives for spectrum geometry ──
 def cayley_menger_vol2(points):
     B, N, D = points.shape
@@ -50,7 +55,6 @@ def cayley_menger_vol2(points):
 def cv_of(emb, n_samples=200):
-    """CV of a set of points. emb: (N, D)."""
     if emb.dim() != 2 or emb.shape[0] < 5:
         return 0.0
     N, D = emb.shape
@@ -64,20 +68,7 @@ def cv_of(emb, n_samples=200):
     return (vols.std() / (vols.mean() + 1e-8)).item()
-def cv_loss(emb, target=0.29154, n_samples=64):
-    """CV loss targeting the binding constant."""
-    N, D = emb.shape
-    if N < 5:
-        return torch.tensor(0.0, device=emb.device, requires_grad=True)
-    pool = min(N, 512)
-    indices = torch.stack([torch.randperm(pool, device=emb.device)[:5] for _ in range(n_samples)])
-    vol2 = cayley_menger_vol2(emb[:pool][indices])
-    valid = vol2 > 1e-20
-    if valid.sum() < 5:
-        return torch.tensor(0.0, device=emb.device, requires_grad=True)
-    vols = vol2[valid].sqrt()
-    cv = vols.std() / (vols.mean() + 1e-8)
-    return (cv - target).pow(2)
 # ── Data ──
@@ -96,48 +87,46 @@ def get_cifar10(batch_size=256):
 # ── SVAE ──
-BINDING_CONSTANT = 0.29154
 class SVAE(nn.Module):
-    def __init__(self, matrix_h=1024, keep_k=24):
         super().__init__()
-        self.matrix_h = matrix_h
-        self.matrix_k = keep_k
-        self.keep_k = keep_k
         self.img_dim = 3 * 32 * 32
-        self.mat_dim = matrix_h * keep_k
-        # Deeper encoder for 1024×24 = 24,576 elements
         self.encoder = nn.Sequential(
-            nn.Linear(self.img_dim, 1024),
             nn.GELU(),
-            nn.Linear(1024, 2048),
             nn.GELU(),
-            nn.Linear(2048, self.mat_dim),
         )
-        # Deeper decoder — symmetric
         self.decoder = nn.Sequential(
-            nn.Linear(self.mat_dim, 2048),
             nn.GELU(),
-            nn.Linear(2048, 1024),
             nn.GELU(),
-            nn.Linear(1024, self.img_dim),
         )
     def encode(self, images):
         B = images.shape[0]
-        M = self.encoder(images.reshape(B, -1)).reshape(B, self.matrix_h, self.matrix_k)
         if HAS_GEOLIP:
             U, S, Vh = geolip_svd(M)
         else:
-            U, S, V = torch.svd_lowrank(M, q=self.keep_k)
             Vh = V.transpose(1, 2)
         return {
             'U': U, 'S': S, 'Vt': Vh,
-            'M': M,
         }
     def decode_from_svd(self, U, S, Vt):
@@ -159,51 +148,44 @@ class SVAE(nn.Module):
 # ── Training ──
-def train(epochs=50, lr=1e-3, cv_weight=0.1, device='cuda'):
     device = torch.device(device if torch.cuda.is_available() else 'cpu')
     train_loader, test_loader = get_cifar10(batch_size=256)
-    keep_k = 24  # D=24 → binding constant
-    model = SVAE(matrix_h=1024, keep_k=keep_k).to(device)
     opt = torch.optim.Adam(model.parameters(), lr=lr)
     sched = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=epochs)
     total_params = sum(p.numel() for p in model.parameters())
-    print(f"SVAE — Binding Constant Alignment")
-    print(f"  Matrix: ({model.matrix_h}, {model.matrix_k})")
-    print(f"  D=24 → target CV = {BINDING_CONSTANT}")
-    print(f"  SVD: {'geolip-core Gram+eigh' if HAS_GEOLIP else 'torch.svd_lowrank'}")
-    print(f"  Compression: {model.img_dim} → {keep_k} ({model.img_dim // keep_k}:1)")
     print(f"  Params: {total_params:,}")
-    print(f"  Device: {device}")
     print("=" * 85)
-    print(f"{'ep':>3} | {'loss':>7} {'recon':>7} {'cv_l':>7} | "
           f"{'t_recon':>7} | "
-          f"{'S0':>6} {'S23':>6} {'ratio':>6} {'erank':>6} {'spec_cv':>7}")
     print("-" * 85)
     for epoch in range(1, epochs + 1):
         model.train()
-        total_loss, total_recon, n = 0, 0, 0
         for images, labels in train_loader:
             images = images.to(device)
             opt.zero_grad()
             out = model(images)
-            recon_loss = F.mse_loss(out['recon'], images)
-            # CV loss on the SPECTRUM as a D=24 embedding
-            # Each sample's 24 singular values = a point in R^24
-            # The batch of spectra should have CV → 0.29154
-            spectrum_cv_loss = cv_loss(out['svd']['S'], target=BINDING_CONSTANT)
-            loss = recon_loss + cv_weight * spectrum_cv_loss
             loss.backward()
             opt.step()
             total_loss += loss.item() * len(images)
-            total_recon += recon_loss.item() * len(images)
             n += len(images)
         sched.step()
@@ -213,7 +195,7 @@ def train(epochs=50, lr=1e-3, cv_weight=0.1, device='cuda'):
             test_recon, test_n = 0, 0
             test_S = None
             test_erank = 0
-            test_spec_cv = 0
             nb = 0
             with torch.no_grad():
@@ -224,8 +206,11 @@ def train(epochs=50, lr=1e-3, cv_weight=0.1, device='cuda'):
                     test_n += len(images)
                     test_erank += model.effective_rank(out['svd']['S']).mean().item()
-                    # Measure actual CV of the batch spectra
-                    test_spec_cv += cv_of(out['svd']['S'])
                     if test_S is None:
                         test_S = out['svd']['S'].mean(0).cpu()
@@ -234,15 +219,16 @@ def train(epochs=50, lr=1e-3, cv_weight=0.1, device='cuda'):
                     nb += 1
             test_erank /= nb
-            test_spec_cv /= nb
             test_S /= nb
             ratio = (test_S[0] / (test_S[-1] + 1e-8)).item()
-            print(f"{epoch:3d} | {total_loss/n:7.4f} {total_recon/n:7.4f} "
-                  f"{spectrum_cv_loss.item():7.5f} | "
                   f"{test_recon/test_n:7.4f} | "
                   f"{test_S[0]:6.3f} {test_S[-1]:6.3f} {ratio:6.2f} "
-                  f"{test_erank:6.2f} {test_spec_cv:7.4f}")
     # ── Final Analysis ──
     print()
@@ -252,6 +238,7 @@ def train(epochs=50, lr=1e-3, cv_weight=0.1, device='cuda'):
     model.eval()
     all_S, all_recon_err, all_labels = [], [], []
     with torch.no_grad():
         for images, labels in test_loader:
@@ -263,20 +250,27 @@ def train(epochs=50, lr=1e-3, cv_weight=0.1, device='cuda'):
                 .mean(dim=(1, 2, 3)).cpu())
             all_labels.append(labels.cpu())
     all_S = torch.cat(all_S)
     all_recon_err = torch.cat(all_recon_err)
     all_labels = torch.cat(all_labels)
     erank = model.effective_rank(all_S)
-    spec_cv = cv_of(all_S)
-    print(f"\n  Bottleneck: {keep_k} singular values (D=24)")
     print(f"  Recon MSE: {all_recon_err.mean():.6f} ± {all_recon_err.std():.6f}")
     print(f"  Effective rank: {erank.mean():.2f} ± {erank.std():.2f}")
-    print(f"  Spectrum CV: {spec_cv:.4f}  (target: {BINDING_CONSTANT})")
-    print(f"  Delta from binding constant: {abs(spec_cv - BINDING_CONSTANT):.4f}")
-    # Singular value profile
     S_mean = all_S.mean(0)
     total_energy = (S_mean ** 2).sum()
     print(f"\n  Singular value profile:")
@@ -286,34 +280,32 @@ def train(epochs=50, lr=1e-3, cv_weight=0.1, device='cuda'):
         cumulative += e
         pct = cumulative / total_energy * 100
         bar = "█" * int(S_mean[i].item() * 30 / (S_mean[0].item() + 1e-8))
-        print(f"    S[{i:2d}]: {S_mean[i]:8.3f}  cum_energy={pct:5.1f}%  {bar}")
-    # Per-class spectral signatures
     cifar_names = ['plane', 'car', 'bird', 'cat', 'deer',
                    'dog', 'frog', 'horse', 'ship', 'truck']
     print(f"\n  Per-class:")
-    print(f"    {'class':>6}  {'recon':>8}  {'erank':>6}  {'spec_cv':>7}  "
-          f"{'S0':>6}  {'S23':>6}  {'ratio':>6}")
     for c in range(10):
         mask = all_labels == c
         rc = all_recon_err[mask].mean().item()
         er = erank[mask].mean().item()
-        sc = cv_of(all_S[mask])
         s0 = all_S[mask, 0].mean().item()
-        s23 = all_S[mask, -1].mean().item()
-        ratio = s0 / (s23 + 1e-8)
-        print(f"    {cifar_names[c]:>6}  {rc:8.6f}  {er:6.2f}  {sc:7.4f}  "
-              f"{s0:6.3f}  {s23:6.3f}  {ratio:6.2f}")
     # Cross-class spectral variance
     class_S_means = torch.stack([all_S[all_labels == c].mean(0) for c in range(10)])
     s_var = class_S_means.std(0)
-    print(f"\n  Cross-class spectral variance (per component):")
-    for i in range(keep_k):
-        bar = "█" * int(s_var[i].item() * 50 / (s_var.max().item() + 1e-8))
-        print(f"    S[{i:2d}]: var={s_var[i]:.4f}  {bar}")
-    # ── Save reconstruction grid ──
     print(f"\n  Saving reconstruction grid...")
     import matplotlib
     matplotlib.use('Agg')
@@ -338,9 +330,8 @@ def train(epochs=50, lr=1e-3, cv_weight=0.1, device='cuda'):
         S = out['svd']['S'][selected_idx]
         Vt = out['svd']['Vt'][selected_idx]
-        mode_counts = [1, 4, 8, 16, 24]
-        mode_counts = [m for m in mode_counts if m <= keep_k]
-        mode_counts = list(dict.fromkeys(mode_counts))
         prog_recons = []
         for n_modes in mode_counts:
             r = model.decode_from_svd(U[:, :, :n_modes], S[:, :n_modes], Vt[:, :n_modes, :])
@@ -352,7 +343,6 @@ def train(epochs=50, lr=1e-3, cv_weight=0.1, device='cuda'):
     n_samples = len(selected_idx)
     n_cols = 2 + len(mode_counts)
     fig, axes = plt.subplots(n_samples, n_cols, figsize=(n_cols * 1.5, n_samples * 1.5))
     col_titles = ['Original'] + [f'{m} mode{"s" if m > 1 else ""}' for m in mode_counts] + ['|Error|×5']
     for i in range(n_samples):

 """
+SVAE — Structural Binding Constant
+=====================================
+Matrix (V, 24): V rows in D=24 space.
+At D=24, CV ≈ 0.29154 BY CONSTRUCTION — no loss needed.
+The sweep proved it:
+  V=200,  D=24 → CV=0.2914
+  V=1024, D=24 → CV=0.2916
+  V=1992, D=24 → CV=0.2911
+  V is irrelevant. D determines CV.
+The encoder produces a (V, 24) matrix.
+The rows ARE an embedding: V tokens in D=24 space.
+Their CV is ~0.29 by the dimensional law.
+The SVD decomposes this embedding into its spectral structure.
+The decoder reconstructs from the decomposition.
+No CV loss. Monitor only. The geometry is inherent.
 pip install "git+https://github.com/AbstractEyes/geolip-core.git"
 """
     print("Using geolip-core SVD (Gram + eigh)")
 except ImportError:
     HAS_GEOLIP = False
+    print("geolip-core not found, fallback to torch.svd_lowrank")
+# ── CM for monitoring (not loss) ──
 def cayley_menger_vol2(points):
     B, N, D = points.shape
 def cv_of(emb, n_samples=200):
     if emb.dim() != 2 or emb.shape[0] < 5:
         return 0.0
     N, D = emb.shape
     return (vols.std() / (vols.mean() + 1e-8)).item()
+BINDING_CONSTANT = 0.29154
 # ── Data ──
 # ── SVAE ──
 class SVAE(nn.Module):
+    def __init__(self, matrix_v=48, D=24):
+        """
+        matrix_v: number of rows (vocabulary size of the implicit embedding)
+        D: embedding dimension = number of singular values = 24 for binding constant
+        """
         super().__init__()
+        self.matrix_v = matrix_v  # V — number of embedding rows
+        self.D = D                # D — embedding dimension
         self.img_dim = 3 * 32 * 32
+        self.mat_dim = matrix_v * D
         self.encoder = nn.Sequential(
+            nn.Linear(self.img_dim, 512),
             nn.GELU(),
+            nn.Linear(512, 512),
             nn.GELU(),
+            nn.Linear(512, self.mat_dim),
         )
         self.decoder = nn.Sequential(
+            nn.Linear(self.mat_dim, 512),
             nn.GELU(),
+            nn.Linear(512, 512),
             nn.GELU(),
+            nn.Linear(512, self.img_dim),
         )
     def encode(self, images):
         B = images.shape[0]
+        M = self.encoder(images.reshape(B, -1)).reshape(B, self.matrix_v, self.D)
         if HAS_GEOLIP:
             U, S, Vh = geolip_svd(M)
         else:
+            U, S, V = torch.svd_lowrank(M, q=self.D)
             Vh = V.transpose(1, 2)
         return {
             'U': U, 'S': S, 'Vt': Vh,
+            'M': M,  # the embedding matrix — rows are V points in D=24
         }
     def decode_from_svd(self, U, S, Vt):
 # ── Training ──
+def train(epochs=50, lr=1e-3, device='cuda'):
     device = torch.device(device if torch.cuda.is_available() else 'cpu')
     train_loader, test_loader = get_cifar10(batch_size=256)
+    D = 24
+    V = 48
+    model = SVAE(matrix_v=V, D=D).to(device)
     opt = torch.optim.Adam(model.parameters(), lr=lr)
     sched = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=epochs)
     total_params = sum(p.numel() for p in model.parameters())
+    print(f"SVAE — Structural Binding Constant")
+    print(f"  Matrix: ({V}, {D}) — {V} rows in D={D} space")
+    print(f"  Expected row CV ≈ {BINDING_CONSTANT} (no loss, by construction)")
+    print(f"  SVD: {'geolip-core' if HAS_GEOLIP else 'torch.svd_lowrank'}")
+    print(f"  Compression: {model.img_dim} → {D} ({model.img_dim // D}:1)")
     print(f"  Params: {total_params:,}")
     print("=" * 85)
+    print(f"{'ep':>3} | {'loss':>7} {'recon':>7} | "
           f"{'t_recon':>7} | "
+          f"{'S0':>6} {'SD':>6} {'ratio':>6} {'erank':>6} | "
+          f"{'row_cv':>7} {'Δbc':>7}")
     print("-" * 85)
     for epoch in range(1, epochs + 1):
         model.train()
+        total_loss, n = 0, 0
         for images, labels in train_loader:
             images = images.to(device)
             opt.zero_grad()
             out = model(images)
+            loss = F.mse_loss(out['recon'], images)
             loss.backward()
             opt.step()
             total_loss += loss.item() * len(images)
             n += len(images)
         sched.step()
             test_recon, test_n = 0, 0
             test_S = None
             test_erank = 0
+            row_cvs = []
             nb = 0
             with torch.no_grad():
                     test_n += len(images)
                     test_erank += model.effective_rank(out['svd']['S']).mean().item()
+                    # CV of matrix rows: each M[i] is (V, D) — V points in D=24
+                    # Sample a few to keep it fast
+                    if nb < 5:
+                        for b in range(min(4, len(images))):
+                            row_cvs.append(cv_of(out['svd']['M'][b]))
                     if test_S is None:
                         test_S = out['svd']['S'].mean(0).cpu()
                     nb += 1
             test_erank /= nb
             test_S /= nb
             ratio = (test_S[0] / (test_S[-1] + 1e-8)).item()
+            mean_row_cv = sum(row_cvs) / len(row_cvs) if row_cvs else 0
+            delta_bc = abs(mean_row_cv - BINDING_CONSTANT)
+            print(f"{epoch:3d} | {total_loss/n:7.4f} {total_loss/n:7.4f} | "
                   f"{test_recon/test_n:7.4f} | "
                   f"{test_S[0]:6.3f} {test_S[-1]:6.3f} {ratio:6.2f} "
+                  f"{test_erank:6.2f} | "
+                  f"{mean_row_cv:7.4f} {delta_bc:7.4f}")
     # ── Final Analysis ──
     print()
     model.eval()
     all_S, all_recon_err, all_labels = [], [], []
+    all_row_cvs = []
     with torch.no_grad():
         for images, labels in test_loader:
                 .mean(dim=(1, 2, 3)).cpu())
             all_labels.append(labels.cpu())
+            # Row CV for a sample of images
+            for b in range(min(8, len(images))):
+                all_row_cvs.append(cv_of(out['svd']['M'][b]))
     all_S = torch.cat(all_S)
     all_recon_err = torch.cat(all_recon_err)
     all_labels = torch.cat(all_labels)
     erank = model.effective_rank(all_S)
+    mean_row_cv = sum(all_row_cvs) / len(all_row_cvs)
+    print(f"\n  Architecture: ({V}, {D}) — {V} rows × D={D}")
     print(f"  Recon MSE: {all_recon_err.mean():.6f} ± {all_recon_err.std():.6f}")
     print(f"  Effective rank: {erank.mean():.2f} ± {erank.std():.2f}")
+    print(f"\n  Row CV (matrix rows as D={D} embedding):")
+    print(f"    Measured: {mean_row_cv:.4f}")
+    print(f"    Target:   {BINDING_CONSTANT}")
+    print(f"    Delta:    {abs(mean_row_cv - BINDING_CONSTANT):.4f}")
+    print(f"    {'✓ AT BINDING CONSTANT' if abs(mean_row_cv - BINDING_CONSTANT) < 0.01 else '✗ Not at binding constant'}")
+    # Spectrum profile
     S_mean = all_S.mean(0)
     total_energy = (S_mean ** 2).sum()
     print(f"\n  Singular value profile:")
         cumulative += e
         pct = cumulative / total_energy * 100
         bar = "█" * int(S_mean[i].item() * 30 / (S_mean[0].item() + 1e-8))
+        print(f"    S[{i:2d}]: {S_mean[i]:8.3f}  cum={pct:5.1f}%  {bar}")
+    # Per-class
     cifar_names = ['plane', 'car', 'bird', 'cat', 'deer',
                    'dog', 'frog', 'horse', 'ship', 'truck']
     print(f"\n  Per-class:")
+    print(f"    {'class':>6}  {'recon':>8}  {'erank':>6}  {'S0':>7}  {'SD':>7}  {'ratio':>6}")
     for c in range(10):
         mask = all_labels == c
         rc = all_recon_err[mask].mean().item()
         er = erank[mask].mean().item()
         s0 = all_S[mask, 0].mean().item()
+        sd = all_S[mask, -1].mean().item()
+        ratio = s0 / (sd + 1e-8)
+        print(f"    {cifar_names[c]:>6}  {rc:8.6f}  {er:6.2f}  {s0:7.3f}  {sd:7.3f}  {ratio:6.2f}")
     # Cross-class spectral variance
     class_S_means = torch.stack([all_S[all_labels == c].mean(0) for c in range(10)])
     s_var = class_S_means.std(0)
+    print(f"\n  Cross-class S variance (top 5 most discriminative):")
+    _, top_idx = s_var.topk(5)
+    for idx in top_idx:
+        i = idx.item()
+        print(f"    S[{i:2d}]: var={s_var[i]:.4f}")
+    # ── Reconstruction grid ──
     print(f"\n  Saving reconstruction grid...")
     import matplotlib
     matplotlib.use('Agg')
         S = out['svd']['S'][selected_idx]
         Vt = out['svd']['Vt'][selected_idx]
+        mode_counts = [1, 4, 8, 16, D]
+        mode_counts = list(dict.fromkeys([m for m in mode_counts if m <= D]))
         prog_recons = []
         for n_modes in mode_counts:
             r = model.decode_from_svd(U[:, :, :n_modes], S[:, :n_modes], Vt[:, :n_modes, :])
     n_samples = len(selected_idx)
     n_cols = 2 + len(mode_counts)
     fig, axes = plt.subplots(n_samples, n_cols, figsize=(n_cols * 1.5, n_samples * 1.5))
     col_titles = ['Original'] + [f'{m} mode{"s" if m > 1 else ""}' for m in mode_counts] + ['|Error|×5']
     for i in range(n_samples):