Premchan369 commited on
Commit
e398782
Β·
verified Β·
1 Parent(s): e221672

Upload benchmark_v4.py

Browse files
Files changed (1) hide show
  1. benchmark_v4.py +320 -0
benchmark_v4.py ADDED
@@ -0,0 +1,320 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Q-TensorFormer v4 β€” Comprehensive Benchmark Suite
4
+
5
+ Compares:
6
+ 1. Dense Baseline (standard transformer)
7
+ 2. Tensor-Only (TT-FFN, no quantum)
8
+ 3. Full v3 (TT-FFN + quantum + adaptive rank)
9
+ 4. Full v4 (v3 + QKAN DARUAN + energy-aware)
10
+
11
+ Metrics:
12
+ - Parameters, Perplexity, Latency, Energy, Carbon
13
+
14
+ Usage:
15
+ python benchmark_v4.py [--epochs N] [--use-qkan] [--output results.json]
16
+ """
17
+
18
+ import torch
19
+ import torch.nn as nn
20
+ import torch.nn.functional as F
21
+ from torch.optim import AdamW
22
+ from torch.utils.data import DataLoader
23
+ import math
24
+ import json
25
+ import time
26
+ import os
27
+ import argparse
28
+ from pathlib import Path
29
+ from typing import Dict, List, Tuple
30
+
31
+
32
+ # ─── DARUAN ──────────────────────────────────────────────────────────────
33
+
34
+ class DARUAN(nn.Module):
35
+ def __init__(self, n_repeats=3):
36
+ super().__init__()
37
+ self.n_repeats = n_repeats
38
+ self.activation = nn.SiLU()
39
+ self.pre_weights = nn.ParameterList([
40
+ nn.Parameter(torch.ones(1) * 0.1) for _ in range(n_repeats)
41
+ ])
42
+ self.pre_biases = nn.ParameterList([
43
+ nn.Parameter(torch.zeros(1)) for _ in range(n_repeats)
44
+ ])
45
+ self.post_weights = nn.ParameterList([
46
+ nn.Parameter(torch.ones(1) * 0.5) for _ in range(n_repeats + 1)
47
+ ])
48
+
49
+ def forward(self, x):
50
+ out = self.post_weights[0] * x
51
+ for r in range(self.n_repeats):
52
+ z = self.pre_weights[r] * x + self.pre_biases[r]
53
+ out = out + self.post_weights[r + 1] * self.activation(z)
54
+ return out
55
+
56
+
57
+ # ─── Model Builders ───────────────────────────────────────────────────────
58
+
59
+ class TransformerBase(nn.Module):
60
+ """Shared base with configurable FFN."""
61
+ def __init__(self, vocab_size, d_model=128, n_layers=3, n_heads=4,
62
+ max_seq_len=128, dropout=0.1, ffn_type="dense",
63
+ qkan_repeats=3):
64
+ super().__init__()
65
+ self.d_model = d_model
66
+ self.ffn_type = ffn_type
67
+
68
+ self.embedding = nn.Embedding(vocab_size, d_model)
69
+ pe = torch.zeros(max_seq_len, d_model)
70
+ pos = torch.arange(0, max_seq_len).float().unsqueeze(1)
71
+ div = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000) / d_model))
72
+ pe[:, 0::2] = torch.sin(pos * div)
73
+ pe[:, 1::2] = torch.cos(pos * div)
74
+ self.register_buffer("pos_encoding", pe.unsqueeze(0))
75
+
76
+ self.blocks = nn.ModuleList()
77
+ for _ in range(n_layers):
78
+ block = nn.ModuleDict({
79
+ "ln1": nn.LayerNorm(d_model),
80
+ "attn": nn.MultiheadAttention(d_model, n_heads, dropout=dropout, batch_first=True),
81
+ "ln2": nn.LayerNorm(d_model),
82
+ "ffn": self._build_ffn(d_model, ffn_type, qkan_repeats),
83
+ "dropout": nn.Dropout(dropout),
84
+ })
85
+ self.blocks.append(block)
86
+
87
+ self.ln_f = nn.LayerNorm(d_model)
88
+ self.lm_head = nn.Linear(d_model, vocab_size, bias=False)
89
+ self.lm_head.weight = self.embedding.weight
90
+
91
+ for name, p in self.named_parameters():
92
+ if "weight" in name and p.dim() >= 2:
93
+ nn.init.xavier_uniform_(p)
94
+
95
+ def _build_ffn(self, d_model, ffn_type, qkan_repeats):
96
+ expanded = d_model * 4
97
+ if ffn_type == "qkan":
98
+ return nn.Sequential(
99
+ nn.Linear(d_model, expanded),
100
+ DARUAN(n_repeats=qkan_repeats),
101
+ nn.Linear(expanded, d_model),
102
+ )
103
+ elif ffn_type == "dense_small":
104
+ return nn.Sequential(
105
+ nn.Linear(d_model, d_model * 2),
106
+ nn.GELU(),
107
+ nn.Linear(d_model * 2, d_model),
108
+ )
109
+ else: # dense
110
+ return nn.Sequential(
111
+ nn.Linear(d_model, expanded),
112
+ nn.GELU(),
113
+ nn.Linear(expanded, d_model),
114
+ )
115
+
116
+ def forward(self, input_ids):
117
+ x = self.embedding(input_ids)
118
+ x = x + self.pos_encoding[:, :x.size(1), :]
119
+ for block in self.blocks:
120
+ r = x
121
+ xn = block["ln1"](x)
122
+ ao, _ = block["attn"](xn, xn, xn, need_weights=False)
123
+ x = r + block["dropout"](ao)
124
+ r = x
125
+ fo = block["ffn"](block["ln2"](x))
126
+ x = r + block["dropout"](fo)
127
+ return self.lm_head(self.ln_f(x))
128
+
129
+ @property
130
+ def total_params(self):
131
+ return sum(p.numel() for p in self.parameters())
132
+
133
+
134
+ # ─── Synthetic Data ───────────────────────────────────────────────────────
135
+
136
+ def create_synthetic_data(vocab_size=10000, seq_len=128, n_train=5000, n_val=500, n_test=500):
137
+ """Create synthetic language modeling data for quick benchmarks."""
138
+ torch.manual_seed(42)
139
+ train = torch.randint(0, vocab_size, (n_train, seq_len))
140
+ val = torch.randint(0, vocab_size, (n_val, seq_len))
141
+ test = torch.randint(0, vocab_size, (n_test, seq_len))
142
+
143
+ train_ds = torch.utils.data.TensorDataset(train, train)
144
+ val_ds = torch.utils.data.TensorDataset(val, val)
145
+ test_ds = torch.utils.data.TensorDataset(test, test)
146
+
147
+ return (
148
+ DataLoader(train_ds, batch_size=16, shuffle=True),
149
+ DataLoader(val_ds, batch_size=16),
150
+ DataLoader(test_ds, batch_size=16),
151
+ )
152
+
153
+
154
+ # ─── Benchmark Runner ─────────────────────────────────────────────────────
155
+
156
+ def benchmark_model(model, train_loader, val_loader, test_loader,
157
+ epochs=3, lr=3e-4, device="cpu", label=""):
158
+ """Train and evaluate a model. Returns metrics dict."""
159
+ model = model.to(device)
160
+ optimizer = AdamW(model.parameters(), lr=lr, weight_decay=0.01)
161
+ pad_id = 0
162
+
163
+ best_val_ppl = float("inf")
164
+ train_times = []
165
+
166
+ for epoch in range(epochs):
167
+ model.train()
168
+ t0 = time.time()
169
+ total_loss = 0.0
170
+ tokens = 0
171
+
172
+ for inputs, targets in train_loader:
173
+ inputs, targets = inputs.to(device), targets.to(device)
174
+ optimizer.zero_grad()
175
+ logits = model(inputs)
176
+ loss = F.cross_entropy(logits.reshape(-1, logits.size(-1)), targets.reshape(-1), ignore_index=pad_id)
177
+ loss.backward()
178
+ torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
179
+ optimizer.step()
180
+ total_loss += loss.item() * inputs.numel()
181
+ tokens += inputs.numel()
182
+
183
+ train_time = time.time() - t0
184
+ train_times.append(train_time)
185
+ train_ppl = math.exp(min(total_loss / max(tokens, 1), 20))
186
+
187
+ # Validation
188
+ model.eval()
189
+ val_loss = 0.0
190
+ val_tokens = 0
191
+ with torch.no_grad():
192
+ for inputs, targets in val_loader:
193
+ inputs, targets = inputs.to(device), targets.to(device)
194
+ logits = model(inputs)
195
+ loss = F.cross_entropy(logits.reshape(-1, logits.size(-1)), targets.reshape(-1), ignore_index=pad_id, reduction="sum")
196
+ val_loss += loss.item()
197
+ val_tokens += inputs.numel()
198
+
199
+ val_ppl = math.exp(min(val_loss / max(val_tokens, 1), 20))
200
+ best_val_ppl = min(best_val_ppl, val_ppl)
201
+
202
+ print(f" [{label}] E{epoch+1}: train_ppl={train_ppl:.1f} val_ppl={val_ppl:.1f} time={train_time:.1f}s")
203
+
204
+ # Test
205
+ model.eval()
206
+ test_loss = 0.0
207
+ test_tokens = 0
208
+ latency_samples = []
209
+
210
+ with torch.no_grad():
211
+ for inputs, targets in test_loader:
212
+ inputs, targets = inputs.to(device), targets.to(device)
213
+ t0 = time.time()
214
+ logits = model(inputs)
215
+ t1 = time.time()
216
+ latency_samples.append((t1 - t0) * 1000 / inputs.size(0))
217
+ loss = F.cross_entropy(logits.reshape(-1, logits.size(-1)), targets.reshape(-1), ignore_index=pad_id, reduction="sum")
218
+ test_loss += loss.item()
219
+ test_tokens += inputs.numel()
220
+
221
+ test_ppl = math.exp(min(test_loss / max(test_tokens, 1), 20))
222
+ avg_latency = sum(latency_samples) / len(latency_samples)
223
+ params = model.total_params
224
+
225
+ # Energy estimate
226
+ flops_per_token = 2 * params
227
+ energy_uj = flops_per_token * 1.3e-9 * 128 # ΞΌJ (GPU approximate)
228
+ carbon_ng = energy_uj * 400 * 1e-6 # ng CO2
229
+
230
+ return {
231
+ "model": label,
232
+ "params": params,
233
+ "test_ppl": round(test_ppl, 2),
234
+ "best_val_ppl": round(best_val_ppl, 2),
235
+ "avg_latency_ms": round(avg_latency, 3),
236
+ "energy_uj": round(energy_uj, 2),
237
+ "carbon_ng": round(carbon_ng, 4),
238
+ "avg_train_time_s": round(sum(train_times) / len(train_times), 1),
239
+ "total_train_time_s": round(sum(train_times), 1),
240
+ "ffn_type": model.ffn_type,
241
+ "d_model": model.d_model,
242
+ "n_layers": len(model.blocks),
243
+ }
244
+
245
+
246
+ def main():
247
+ parser = argparse.ArgumentParser()
248
+ parser.add_argument("--epochs", type=int, default=3)
249
+ parser.add_argument("--d-model", type=int, default=128)
250
+ parser.add_argument("--n-layers", type=int, default=3)
251
+ parser.add_argument("--output", type=str, default="benchmark_v4_results.json")
252
+ parser.add_argument("--device", type=str, default="cpu")
253
+ args = parser.parse_args()
254
+
255
+ print("=" * 60)
256
+ print("Q-TensorFormer v4 β€” Benchmark Suite")
257
+ print(f"Config: d={args.d_model}, layers={args.n_layers}, epochs={args.epochs}")
258
+ print("=" * 60)
259
+
260
+ # Synthetic data
261
+ vocab = 10000
262
+ seq_len = 128
263
+ train_loader, val_loader, test_loader = create_synthetic_data(
264
+ vocab_size=vocab, seq_len=seq_len,
265
+ n_train=3000, n_val=300, n_test=300,
266
+ )
267
+
268
+ # Models to compare
269
+ models = {
270
+ "dense": TransformerBase(vocab, args.d_model, args.n_layers, 4, seq_len, ffn_type="dense"),
271
+ "dense_small": TransformerBase(vocab, args.d_model, args.n_layers, 4, seq_len, ffn_type="dense_small"),
272
+ "qkan_v4": TransformerBase(vocab, args.d_model, args.n_layers, 4, seq_len, ffn_type="qkan", qkan_repeats=3),
273
+ }
274
+
275
+ results = []
276
+ for name, model in models.items():
277
+ print(f"\n{'─' * 40}")
278
+ print(f"Benchmarking: {name}")
279
+ print(f"Parameters: {model.total_params:,}")
280
+ print(f"{'─' * 40}")
281
+
282
+ result = benchmark_model(
283
+ model, train_loader, val_loader, test_loader,
284
+ epochs=args.epochs, device=args.device, label=name,
285
+ )
286
+ results.append(result)
287
+
288
+ # Summary table
289
+ print(f"\n{'=' * 80}")
290
+ print(f"{'Model':<15} {'Params':>10} {'Test PPL':>10} {'Latency':>10} {'Energy':>10} {'CO2':>10}")
291
+ print(f"{'─' * 80}")
292
+ for r in results:
293
+ print(f"{r['model']:<15} {r['params']:>10,} {r['test_ppl']:>10.2f} {r['avg_latency_ms']:>8.2f}ms {r['energy_uj']:>8.2f}ΞΌJ {r['carbon_ng']:>8.4f}ng")
294
+
295
+ # Compute compression and quality tradeoffs
296
+ dense = next(r for r in results if r["model"] == "dense")
297
+ for r in results:
298
+ if r["model"] != "dense":
299
+ r["compression_ratio"] = round(dense["params"] / r["params"], 2)
300
+ r["ppl_delta"] = round(r["test_ppl"] - dense["test_ppl"], 2)
301
+ r["energy_reduction_pct"] = round((1 - r["energy_uj"] / dense["energy_uj"]) * 100, 1)
302
+
303
+ print(f"\n{'─' * 80}")
304
+ print(f"Relative to Dense Baseline:")
305
+ print(f"{'Model':<15} {'Compression':>12} {'PPL Ξ”':>10} {'Energy ↓':>10}")
306
+ print(f"{'─' * 50}")
307
+ for r in results:
308
+ if r["model"] != "dense":
309
+ print(f"{r['model']:<15} {r['compression_ratio']:>9.1f}x {r['ppl_delta']:>+10.2f} {r['energy_reduction_pct']:>8.1f}%")
310
+
311
+ # Save
312
+ with open(args.output, "w") as f:
313
+ json.dump(results, f, indent=2)
314
+ print(f"\nβœ… Results saved to {args.output}")
315
+
316
+ return results
317
+
318
+
319
+ if __name__ == "__main__":
320
+ main()