// SPDX-License-Identifier: CC-BY-NC-4.0
// Copyright (c) 2026 Detail Technologies B.V.
//
// In-browser Clear pipeline glue. Section 03b ("ONNX model · Android & Web")
// hosts the form inline; the floating "Try with your audio" button scrolls
// to that section, and dragging any audio file onto the page populates the
// form directly (model loads if it hasn't yet). No drawer — the form lives
// on the page.
//
// Imports the same lib that powers `clear/examples/web/`. To bump the lib
// here, re-copy from there (or wait for the npm package extraction).
import { Clear, SR, encodeWav, decodeToMono } from './lib-clear/clear.js';
import { encodeM4A } from './lib-clear/encode-m4a.js';
const M4A_INPUT_EXTS = ['m4a', 'mp4', 'mov', 'aac', 'm4b'];
const HF = 'https://huggingface.co/detail-co/clear/resolve/main';
const MODEL_URLS = {
studio: `${HF}/clear-studio.onnx`,
natural: `${HF}/clear-natural.onnx`,
};
const forceWasm = new URLSearchParams(location.search).has('wasm');
// Triangle apex sits at x=12 (not 13) so the path's centroid lands ~1 px
// left of geometric center — that's the offset the eye reads as "centered"
// for a right-pointing play triangle. Pause icon is geometrically centered.
const ICON_PLAY = ``;
const ICON_PAUSE = ``;
const START_LEAD = 0.05;
const $ = (id) => document.getElementById(id);
/** Single-line truncate for filenames inside progress messages. */
function truncateName(name, max = 28) {
if (!name || name.length <= max) return name;
const dot = name.lastIndexOf('.');
if (dot < 0 || name.length - dot > 6) return name.slice(0, max - 1) + '…';
// Preserve extension; trim middle of the stem.
const stem = name.slice(0, dot);
const ext = name.slice(dot);
const keep = max - ext.length - 1;
return stem.slice(0, keep) + '…' + ext;
}
/** Status text writes to the inner sentence span of the meta block.
* Carries decode/enhance progress, then the final result sentence. */
function setStatus(msg) {
const el = $('tryPlayerMetaText');
if (el) el.textContent = msg || '';
}
let clear = null;
let clearVariant = null;
let enhancedURL = null;
let m4aURL = null;
let player = null;
let lastEnhanced = null; // { samples: Float32Array, sampleRate, stem }
let modelLoading = null; // in-flight load promise, to dedupe concurrent calls
// Per-file state. `currentResults` caches enhanced outputs keyed by variant
// so toggling Studio ↔ Natural after both are generated is instant — only
// the player is rebuilt, no re-enhance.
let currentFile = null; // { name, stem, ext }
let currentRaw = null; // Float32Array @ 48 kHz mono
let currentResults = {}; // { studio?: result, natural?: result }
let processing = false; // guards against concurrent enhance calls
function fmtTime(sec) {
if (!Number.isFinite(sec)) return '0:00';
const total = Math.round(sec);
const m = Math.floor(total / 60);
const s = total % 60;
return `${m}:${String(s).padStart(2, '0')}`;
}
// (No standalone progress bars anymore — the card's controls / play button
// stay grayed via the `.is-empty` class until enhance completes; status text
// carries percentage updates during decode + inference.)
function teardownPlayer() {
if (player) { player.dispose(); player = null; }
// Card stays rendered; just clear out the populated state.
const p = $('tryPlayer');
if (p) {
p.classList.add('is-empty');
const metaText = $('tryPlayerMetaText');
if (metaText) metaText.textContent = 'Pick a m4a, aac, or wav from your files to enhance.\nWorks best with files shorter than 10 minutes.';
const saveLink = $('tryPlayerSaveLink');
if (saveLink) { saveLink.removeAttribute('href'); }
const fname = $('tryPlayerFilename');
if (fname) fname.textContent = 'YOUR AUDIO';
const slider = $('tryMixSlider');
if (slider) slider.disabled = true;
const playBtn = $('tryPlayBtn');
if (playBtn) playBtn.disabled = true;
}
if (enhancedURL) { URL.revokeObjectURL(enhancedURL); enhancedURL = null; }
if (m4aURL) { URL.revokeObjectURL(m4aURL); m4aURL = null; }
const m4aBtn = $('tryDownloadM4a');
if (m4aBtn) {
m4aBtn.style.display = 'none';
m4aBtn.disabled = false;
m4aBtn.textContent = 'Save as .m4a';
}
lastEnhanced = null;
}
// Release the ORT session before the page unloads. Without this, the
// browser holds onto the WebGPU device across reloads in the same tab
// and the next page load hits OOM trying to acquire the adapter. `pagehide`
// is more reliable than `beforeunload` (fires for back/forward cache too)
// and lets us at least kick off the async release before the page goes.
window.addEventListener('pagehide', () => {
if (clear) { clear.dispose().catch(() => {}); clear = null; clearVariant = null; }
});
// ── Scroll to the try-browser section ─────────────────────────────────
// FAB is an already; this is the drop-target equivalent
// — used when a file is dragged in so the visitor gets pulled to the
// populated form.
function scrollToSection() {
const sec = document.getElementById('try');
if (sec) sec.scrollIntoView({ behavior: 'smooth', block: 'start' });
}
// Variant toggle: if a file is loaded, regenerate (or swap to cached
// result) for the new variant. Otherwise the toggle just marks intent
// for the next file pick.
document.querySelectorAll('input[name=tryVariant]').forEach((r) =>
r.addEventListener('change', () => onVariantToggle()));
// "upload another" — inline link inside the meta sentence. Forwards
// the click to the hidden file input that lives inside the picker label.
$('tryPlayerNewInline')?.addEventListener('click', () => {
$('tryFileInput').click();
});
// Save link reflects the current slider mix — clicking builds a new
// Float32Array as `raw * (1-mix) + enhanced * mix`, encodes WAV, and
// downloads. So dragging the slider to 60% and clicking Save writes
// exactly that blend, not the 100% enhanced.
function currentMix() {
const s = $('tryMixSlider');
if (!s) return 1;
return Math.max(0, Math.min(1, parseFloat(s.value) / 100));
}
function blendSamples(raw, enhanced, mix) {
const n = Math.min(raw.length, enhanced.length);
const out = new Float32Array(n);
const wRaw = 1 - mix;
for (let i = 0; i < n; i++) out[i] = raw[i] * wRaw + enhanced[i] * mix;
return out;
}
function triggerDownload(blob, filename) {
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url; a.download = filename;
document.body.appendChild(a); a.click(); document.body.removeChild(a);
setTimeout(() => URL.revokeObjectURL(url), 60_000);
}
$('tryPlayerSaveLink')?.addEventListener('click', (e) => {
if (!currentRaw || !lastEnhanced || !currentFile) return;
e.preventDefault();
const mixed = blendSamples(currentRaw, lastEnhanced.samples, currentMix());
triggerDownload(encodeWav(mixed, lastEnhanced.sampleRate),
`${currentFile.stem}.clear.wav`);
});
async function onVariantToggle() {
if (!currentRaw || processing) return;
const variant = document.querySelector('input[name=tryVariant]:checked').value;
if (currentResults[variant]) {
rebuildPlayerFromCache(variant);
} else {
await enhanceCurrentRaw(variant);
}
}
// ── Page-wide drag-and-drop ───────────────────────────────────────────
// Drop any audio/video file anywhere on the page → drawer opens, model
// loads (if it hasn't yet), file processes once ready.
const AUDIO_EXT_RE = /\.(wav|mp3|m4a|m4b|mp4|mov|aac|ogg|opus|flac|webm|aiff?)$/i;
function isAudioFile(file) {
if (file.type && (file.type.startsWith('audio/') || file.type.startsWith('video/'))) return true;
return AUDIO_EXT_RE.test(file.name);
}
function deliverFileToInput(file) {
const dt = new DataTransfer();
dt.items.add(file);
const input = $('tryFileInput');
input.files = dt.files;
input.dispatchEvent(new Event('change'));
}
let dragDepth = 0;
document.addEventListener('dragenter', (e) => {
if (!e.dataTransfer?.types.includes('Files')) return;
dragDepth++;
document.body.classList.add('drag-over');
});
document.addEventListener('dragleave', () => {
dragDepth = Math.max(0, dragDepth - 1);
if (dragDepth === 0) document.body.classList.remove('drag-over');
});
document.addEventListener('dragover', (e) => {
if (e.dataTransfer?.types.includes('Files')) e.preventDefault();
});
document.addEventListener('drop', (e) => {
if (!e.dataTransfer?.files?.length) return;
e.preventDefault();
dragDepth = 0;
document.body.classList.remove('drag-over');
const file = e.dataTransfer.files[0];
scrollToSection();
if (!isAudioFile(file)) {
setStatus(`Not an audio file: ${truncateName(file.name)}`);
return;
}
// Same flow as clicking Pick file — the change handler auto-loads the
// model if it isn't yet, then decodes + enhances.
deliverFileToInput(file);
});
// ── M4A render-on-request ─────────────────────────────────────────────
// `Save as .m4a` only renders when the user actually asks for it (AAC
// encode + MP4 mux is heavier than the WAV path), and only after we've
// produced an enhanced result. Mediabunny does both stages on the main
// thread; for the demo's chunk sizes that's fine.
$('tryDownloadM4a')?.addEventListener('click', async () => {
if (!currentRaw || !lastEnhanced || !currentFile) return;
const btn = $('tryDownloadM4a');
btn.disabled = true;
btn.textContent = 'Encoding…';
try {
if (m4aURL) { URL.revokeObjectURL(m4aURL); m4aURL = null; }
const mixed = blendSamples(currentRaw, lastEnhanced.samples, currentMix());
const blob = await encodeM4A(mixed, lastEnhanced.sampleRate);
triggerDownload(blob, `${currentFile.stem}.clear.m4a`);
btn.textContent = 'Save as .m4a';
} catch (e) {
console.error('[try] m4a encode failed:', e);
btn.textContent = 'm4a render failed';
} finally {
btn.disabled = false;
}
});
// ── Auto-load model on demand ─────────────────────────────────────────
// First file pick triggers the download; switching variant between picks
// reloads. Multiple concurrent calls dedupe via `modelLoading`.
async function ensureModel(variant) {
if (clearVariant === variant && clear) return clear;
if (modelLoading) {
try { await modelLoading; } catch {}
if (clearVariant === variant && clear) return clear;
}
const label = variant === 'natural' ? 'clear-natural' : 'clear-studio';
if (clear) {
try { await clear.dispose(); } catch {}
clear = null; clearVariant = null;
// Give ORT a tick to actually free the GPU device before we request a
// new adapter — otherwise WebGPU init can fail OOM on the next call.
await new Promise((r) => requestAnimationFrame(r));
}
modelLoading = (async () => {
setStatus(`Loading ${label}…`);
const instance = await Clear.create({
variant,
forceWasm,
onDownloadProgress: (loaded, total) => {
if (!total) return;
const mb = (n) => (n / 1_048_576).toFixed(1);
setStatus(`Loading ${label} · ${mb(loaded)} / ${mb(total)} MB`);
},
onPhase: (phase) => {
if (phase === 'compiling-webgpu') setStatus(`Compiling ${label} for WebGPU…`);
else if (phase === 'compiling-wasm') setStatus(`Compiling ${label} for WASM…`);
},
});
clear = instance;
clearVariant = variant;
return instance;
})();
try { return await modelLoading; }
finally { modelLoading = null; }
}
// ── File pick → decode → enhance → player ─────────────────────────────
$('tryFileInput').addEventListener('change', async (e) => {
const file = e.target.files?.[0];
if (!file) return;
teardownPlayer();
currentFile = {
name: file.name,
stem: file.name.replace(/\.[^.]+$/, ''),
ext: file.name.toLowerCase().match(/\.([^.]+)$/)?.[1] ?? '',
};
currentResults = {};
currentRaw = null;
setStatus(`Decoding ${truncateName(file.name)}…`);
try {
currentRaw = await decodeToMono(file);
} catch (err) {
setStatus(`Couldn't decode that file: ${err.message || err}`);
return;
}
const variant = document.querySelector('input[name=tryVariant]:checked').value;
await enhanceCurrentRaw(variant);
});
async function enhanceCurrentRaw(variant) {
if (!currentRaw || !currentFile) return;
if (processing) return;
processing = true;
$('tryPlayer')?.classList.add('is-processing');
try {
try {
await ensureModel(variant);
} catch (err) {
setStatus(`Failed to load ${variant}: ${err.message || err}`);
return;
}
const shortName = truncateName(currentFile.name);
setStatus(`Enhancing ${shortName}…`);
const t0 = performance.now();
let result;
try {
result = await clear.enhance(currentRaw, {
mastering: 'applePodcasts',
onProgress: (stage, frac) => {
if (stage === 'inference') setStatus(`Enhancing ${shortName} · ${Math.round(frac * 100)}%`);
},
});
} catch (err) {
setStatus(`Enhance failed: ${err.message || err}`);
return;
}
const processingSec = (performance.now() - t0) / 1000;
currentResults[variant] = { ...result, processingSec };
rebuildPlayerFromCache(variant);
} finally {
processing = false;
$('tryPlayer')?.classList.remove('is-processing');
}
}
function rebuildPlayerFromCache(variant) {
const result = currentResults[variant];
if (!result || !currentRaw || !currentFile) return;
// Fast path — both variants already inside the player. Gain-swap only.
if (player && player.hasVariant(variant)) {
player.setActiveVariant(variant);
updateMetaAndSave(variant, result);
return;
}
// Slow path — variant just became available. Rebuild with all cached
// tracks, restoring playback position + state so the listener doesn't
// notice (a brief gap during the rebuild itself is unavoidable).
const lastPos = player ? player.getPosition() : 0;
const wasPlaying = player ? player.isPlaying : false;
if (player) { player.dispose(); player = null; }
const enhancedTracks = {};
for (const [v, r] of Object.entries(currentResults)) {
if (r?.audio) enhancedTracks[v] = r.audio;
}
player = createABPlayer({
raw: currentRaw,
enhancedTracks,
activeVariant: variant,
sampleRate: SR,
});
$('tryPlayer').classList.remove('is-empty');
$('tryPlayerFilename').textContent = currentFile.name;
$('tryMixSlider').disabled = false;
$('tryPlayBtn').disabled = false;
if (lastPos > 0) player.seekTo(lastPos);
if (wasPlaying) player.play();
updateMetaAndSave(variant, result);
}
/** Update the result meta sentence + Save link + m4a button — runs both
* on fresh-variant rebuilds and on cached-variant gain swaps. */
function updateMetaAndSave(variant, result) {
const wavBlob = encodeWav(result.audio, result.sampleRate);
if (enhancedURL) URL.revokeObjectURL(enhancedURL);
enhancedURL = URL.createObjectURL(wavBlob);
const saveLink = $('tryPlayerSaveLink');
saveLink.href = enhancedURL;
saveLink.download = `${currentFile.stem}.clear.wav`;
if (m4aURL) { URL.revokeObjectURL(m4aURL); m4aURL = null; }
lastEnhanced = {
samples: result.audio,
sampleRate: result.sampleRate,
stem: `${currentFile.stem}.clear`,
};
const m4aBtn = $('tryDownloadM4a');
if (M4A_INPUT_EXTS.includes(currentFile.ext)) {
m4aBtn.style.display = 'inline-block';
m4aBtn.disabled = false;
m4aBtn.textContent = 'Save as .m4a';
} else {
m4aBtn.style.display = 'none';
}
$('tryPlayerMetaText').textContent = buildMetaSentence(variant, result, clear.backend);
}
/** Result line in the curated-card transcript style — two-line sentence.
* Variant is shown in the active toggle pill above, no need to repeat. */
function buildMetaSentence(variant, result, backend) {
const dur = result.durationSec < 60
? `${result.durationSec.toFixed(1)} seconds`
: `${fmtTime(result.durationSec)} minutes`;
const proc = `${result.processingSec.toFixed(1)}s`;
const rt = result.processingSec > 0 ? result.durationSec / result.processingSec : 0;
const back = backend === 'webgpu' ? 'WebGPU' : 'WASM';
return `Enhanced ${dur} in ${proc}.\n${Math.round(rt)}× realtime on ${back}.`;
}
// ── Sample-aligned A/B player (raw ↔ enhanced via mix slider) ─────────
/**
* Sample-aligned multi-track A/B player. Raw + every cached enhanced
* variant ({ studio, natural, ... }) are scheduled in parallel and routed
* through one GainNode each. Toggling between variants is just a gain
* swap — playback continues without restart. The mix slider crossfades
* raw against whichever enhanced track is currently active.
*/
function createABPlayer({ raw, enhancedTracks, activeVariant, sampleRate }) {
let ctx = null;
let bufRaw = null;
const bufsEnh = {}; // variant → AudioBuffer
let srcRaw = null;
const srcsEnh = {}; // variant → AudioBufferSourceNode
let gainRaw = null;
const gainsEnh = {}; // variant → GainNode
let isPlaying = false;
let playStartCtxTime = 0;
let pausePos = 0;
let mix = 1.0;
let active = activeVariant;
let rafId = null;
let unlocked = false; // iOS Safari: needs a silent buffer kick on first play
const longest = Math.max(
raw.length,
...Object.values(enhancedTracks).map((s) => s.length),
);
const duration = longest / sampleRate;
const playBtn = $('tryPlayBtn');
const scrubber = $('tryScrubber');
const progress = scrubber.querySelector('.progress');
const cur = scrubber.parentElement.querySelector('.cur');
const tot = scrubber.parentElement.querySelector('.tot');
const slider = $('tryMixSlider');
function ensureCtx() {
if (ctx) return;
const Ctor = window.AudioContext || window.webkitAudioContext;
// iOS 14 and older Safari reject the sampleRate constructor arg —
// fall back to a default-rate context. AudioBuffers we make below
// still declare sampleRate=48 kHz; the source node auto-resamples
// to whatever ctx.sampleRate actually is.
try { ctx = new Ctor({ sampleRate }); }
catch { ctx = new Ctor(); }
bufRaw = ctx.createBuffer(1, raw.length, sampleRate);
bufRaw.copyToChannel(raw, 0);
gainRaw = ctx.createGain();
gainRaw.connect(ctx.destination);
for (const [v, samples] of Object.entries(enhancedTracks)) {
bufsEnh[v] = ctx.createBuffer(1, samples.length, sampleRate);
bufsEnh[v].copyToChannel(samples, 0);
gainsEnh[v] = ctx.createGain();
gainsEnh[v].connect(ctx.destination);
}
applyMix();
}
/** iOS Safari unlock — fires a 1-sample silent buffer on first play so
* subsequent AudioBufferSourceNode.start() calls actually produce sound.
* No-op on desktop. Must run inside the user-gesture call stack. */
function unlockIOSAudio() {
if (unlocked || !ctx) return;
try {
const src = ctx.createBufferSource();
src.buffer = ctx.createBuffer(1, 1, ctx.sampleRate);
src.connect(ctx.destination);
src.start(0);
unlocked = true;
} catch {}
}
function applyMix() {
if (!ctx) return;
gainRaw.gain.value = 1 - mix;
for (const v of Object.keys(gainsEnh)) {
gainsEnh[v].gain.value = (v === active) ? mix : 0;
}
}
function startSources(offset) {
const startAt = ctx.currentTime + START_LEAD;
srcRaw = ctx.createBufferSource();
srcRaw.buffer = bufRaw;
srcRaw.connect(gainRaw);
srcRaw.start(startAt, offset);
for (const v of Object.keys(bufsEnh)) {
const s = ctx.createBufferSource();
s.buffer = bufsEnh[v];
s.connect(gainsEnh[v]);
s.start(startAt, offset);
srcsEnh[v] = s;
}
srcRaw.onended = () => {
if (isPlaying) {
isPlaying = false;
pausePos = 0;
playBtn.dataset.state = 'paused';
playBtn.innerHTML = ICON_PLAY;
stopRaf();
}
};
playStartCtxTime = startAt;
isPlaying = true;
pausePos = offset;
startRaf();
}
function stopSources() {
for (const s of [srcRaw, ...Object.values(srcsEnh)]) {
if (!s) continue;
try { s.onended = null; s.stop(); } catch {}
try { s.disconnect(); } catch {}
}
srcRaw = null;
for (const v of Object.keys(srcsEnh)) delete srcsEnh[v];
isPlaying = false;
stopRaf();
}
function currentPos() {
if (isPlaying) {
const elapsed = pausePos + (ctx.currentTime - playStartCtxTime);
return Math.min(duration, Math.max(0, elapsed));
}
return pausePos;
}
function syncTimes() {
const p = currentPos();
cur.textContent = fmtTime(p);
tot.textContent = fmtTime(duration);
progress.style.width = `${Math.min(100, (p / duration) * 100)}%`;
}
function startRaf() {
if (rafId) return;
const tick = () => { syncTimes(); if (isPlaying) rafId = requestAnimationFrame(tick); };
rafId = requestAnimationFrame(tick);
}
function stopRaf() {
if (rafId) { cancelAnimationFrame(rafId); rafId = null; }
syncTimes();
}
async function play() {
ensureCtx();
// Fire the iOS unlock SYNCHRONOUSLY inside the user gesture stack —
// before any await — so Safari registers it as a user-initiated
// playback. Calling ctx.resume() and starting the silent source
// both have to happen within the same gesture-induced call.
unlockIOSAudio();
if (ctx.state !== 'running') {
const resumed = ctx.resume();
// Some Safaris return undefined here; guard before awaiting.
if (resumed && typeof resumed.then === 'function') await resumed;
}
startSources(pausePos);
}
function pause() {
const pos = currentPos();
stopSources();
pausePos = Math.min(duration, pos);
}
function seekTo(t) {
const wasPlaying = isPlaying;
if (wasPlaying) stopSources();
pausePos = Math.max(0, Math.min(duration - 0.01, t));
if (wasPlaying) startSources(pausePos);
else syncTimes();
}
function paintSlider() {
slider.style.setProperty('--p', `${slider.value}%`);
}
const onPlayClick = async () => {
if (!isPlaying) {
await play();
playBtn.dataset.state = 'playing';
playBtn.innerHTML = ICON_PAUSE;
} else {
pause();
playBtn.dataset.state = 'paused';
playBtn.innerHTML = ICON_PLAY;
}
};
const onScrubClick = (e) => {
const rect = scrubber.getBoundingClientRect();
const pct = (e.clientX - rect.left) / rect.width;
seekTo(pct * duration);
};
const onSlider = () => { mix = slider.value / 100; applyMix(); paintSlider(); };
const mixEnds = document.querySelectorAll('#tryPlayer .mix-end');
const mixEndHandlers = [];
mixEnds.forEach((btn) => {
const h = () => {
slider.value = Number(btn.dataset.mix);
mix = slider.value / 100;
applyMix(); paintSlider();
};
btn.addEventListener('click', h);
mixEndHandlers.push([btn, h]);
});
playBtn.dataset.state = 'paused';
playBtn.innerHTML = ICON_PLAY;
slider.value = 100; mix = 1.0;
paintSlider(); syncTimes();
playBtn.addEventListener('click', onPlayClick);
scrubber.addEventListener('click', onScrubClick);
slider.addEventListener('input', onSlider);
return {
get isPlaying() { return isPlaying; },
getPosition: currentPos,
hasVariant(v) { return v in enhancedTracks; },
/** Swap which enhanced track is heard — seamless gain change, no restart. */
setActiveVariant(v) {
if (!(v in enhancedTracks)) return false;
active = v;
applyMix();
return true;
},
async play() {
if (isPlaying) return;
await play();
playBtn.dataset.state = 'playing';
playBtn.innerHTML = ICON_PAUSE;
},
seekTo,
pauseIfPlaying() {
if (isPlaying) {
pause();
playBtn.dataset.state = 'paused';
playBtn.innerHTML = ICON_PLAY;
}
},
dispose() {
stopSources();
playBtn.removeEventListener('click', onPlayClick);
scrubber.removeEventListener('click', onScrubClick);
slider.removeEventListener('input', onSlider);
for (const [btn, h] of mixEndHandlers) btn.removeEventListener('click', h);
if (ctx) ctx.close();
},
};
}