// SPDX-License-Identifier: CC-BY-NC-4.0 // Copyright (c) 2026 Detail Technologies B.V. // // In-browser Clear pipeline glue. Section 03b ("ONNX model · Android & Web") // hosts the form inline; the floating "Try with your audio" button scrolls // to that section, and dragging any audio file onto the page populates the // form directly (model loads if it hasn't yet). No drawer — the form lives // on the page. // // Imports the same lib that powers `clear/examples/web/`. To bump the lib // here, re-copy from there (or wait for the npm package extraction). import { Clear, SR, encodeWav, decodeToMono } from './lib-clear/clear.js'; import { encodeM4A } from './lib-clear/encode-m4a.js'; const M4A_INPUT_EXTS = ['m4a', 'mp4', 'mov', 'aac', 'm4b']; const HF = 'https://huggingface.co/detail-co/clear/resolve/main'; const MODEL_URLS = { studio: `${HF}/clear-studio.onnx`, natural: `${HF}/clear-natural.onnx`, }; const forceWasm = new URLSearchParams(location.search).has('wasm'); // Triangle apex sits at x=12 (not 13) so the path's centroid lands ~1 px // left of geometric center — that's the offset the eye reads as "centered" // for a right-pointing play triangle. Pause icon is geometrically centered. const ICON_PLAY = ``; const ICON_PAUSE = ``; const START_LEAD = 0.05; const $ = (id) => document.getElementById(id); /** Single-line truncate for filenames inside progress messages. */ function truncateName(name, max = 28) { if (!name || name.length <= max) return name; const dot = name.lastIndexOf('.'); if (dot < 0 || name.length - dot > 6) return name.slice(0, max - 1) + '…'; // Preserve extension; trim middle of the stem. const stem = name.slice(0, dot); const ext = name.slice(dot); const keep = max - ext.length - 1; return stem.slice(0, keep) + '…' + ext; } /** Status text writes to the inner sentence span of the meta block. * Carries decode/enhance progress, then the final result sentence. */ function setStatus(msg) { const el = $('tryPlayerMetaText'); if (el) el.textContent = msg || ''; } let clear = null; let clearVariant = null; let enhancedURL = null; let m4aURL = null; let player = null; let lastEnhanced = null; // { samples: Float32Array, sampleRate, stem } let modelLoading = null; // in-flight load promise, to dedupe concurrent calls // Per-file state. `currentResults` caches enhanced outputs keyed by variant // so toggling Studio ↔ Natural after both are generated is instant — only // the player is rebuilt, no re-enhance. let currentFile = null; // { name, stem, ext } let currentRaw = null; // Float32Array @ 48 kHz mono let currentResults = {}; // { studio?: result, natural?: result } let processing = false; // guards against concurrent enhance calls function fmtTime(sec) { if (!Number.isFinite(sec)) return '0:00'; const total = Math.round(sec); const m = Math.floor(total / 60); const s = total % 60; return `${m}:${String(s).padStart(2, '0')}`; } // (No standalone progress bars anymore — the card's controls / play button // stay grayed via the `.is-empty` class until enhance completes; status text // carries percentage updates during decode + inference.) function teardownPlayer() { if (player) { player.dispose(); player = null; } // Card stays rendered; just clear out the populated state. const p = $('tryPlayer'); if (p) { p.classList.add('is-empty'); const metaText = $('tryPlayerMetaText'); if (metaText) metaText.textContent = 'Pick a m4a, aac, or wav from your files to enhance.\nWorks best with files shorter than 10 minutes.'; const saveLink = $('tryPlayerSaveLink'); if (saveLink) { saveLink.removeAttribute('href'); } const fname = $('tryPlayerFilename'); if (fname) fname.textContent = 'YOUR AUDIO'; const slider = $('tryMixSlider'); if (slider) slider.disabled = true; const playBtn = $('tryPlayBtn'); if (playBtn) playBtn.disabled = true; } if (enhancedURL) { URL.revokeObjectURL(enhancedURL); enhancedURL = null; } if (m4aURL) { URL.revokeObjectURL(m4aURL); m4aURL = null; } const m4aBtn = $('tryDownloadM4a'); if (m4aBtn) { m4aBtn.style.display = 'none'; m4aBtn.disabled = false; m4aBtn.textContent = 'Save as .m4a'; } lastEnhanced = null; } // Release the ORT session before the page unloads. Without this, the // browser holds onto the WebGPU device across reloads in the same tab // and the next page load hits OOM trying to acquire the adapter. `pagehide` // is more reliable than `beforeunload` (fires for back/forward cache too) // and lets us at least kick off the async release before the page goes. window.addEventListener('pagehide', () => { if (clear) { clear.dispose().catch(() => {}); clear = null; clearVariant = null; } }); // ── Scroll to the try-browser section ───────────────────────────────── // FAB is an already; this is the drop-target equivalent // — used when a file is dragged in so the visitor gets pulled to the // populated form. function scrollToSection() { const sec = document.getElementById('try'); if (sec) sec.scrollIntoView({ behavior: 'smooth', block: 'start' }); } // Variant toggle: if a file is loaded, regenerate (or swap to cached // result) for the new variant. Otherwise the toggle just marks intent // for the next file pick. document.querySelectorAll('input[name=tryVariant]').forEach((r) => r.addEventListener('change', () => onVariantToggle())); // "upload another" — inline link inside the meta sentence. Forwards // the click to the hidden file input that lives inside the picker label. $('tryPlayerNewInline')?.addEventListener('click', () => { $('tryFileInput').click(); }); // Save link reflects the current slider mix — clicking builds a new // Float32Array as `raw * (1-mix) + enhanced * mix`, encodes WAV, and // downloads. So dragging the slider to 60% and clicking Save writes // exactly that blend, not the 100% enhanced. function currentMix() { const s = $('tryMixSlider'); if (!s) return 1; return Math.max(0, Math.min(1, parseFloat(s.value) / 100)); } function blendSamples(raw, enhanced, mix) { const n = Math.min(raw.length, enhanced.length); const out = new Float32Array(n); const wRaw = 1 - mix; for (let i = 0; i < n; i++) out[i] = raw[i] * wRaw + enhanced[i] * mix; return out; } function triggerDownload(blob, filename) { const url = URL.createObjectURL(blob); const a = document.createElement('a'); a.href = url; a.download = filename; document.body.appendChild(a); a.click(); document.body.removeChild(a); setTimeout(() => URL.revokeObjectURL(url), 60_000); } $('tryPlayerSaveLink')?.addEventListener('click', (e) => { if (!currentRaw || !lastEnhanced || !currentFile) return; e.preventDefault(); const mixed = blendSamples(currentRaw, lastEnhanced.samples, currentMix()); triggerDownload(encodeWav(mixed, lastEnhanced.sampleRate), `${currentFile.stem}.clear.wav`); }); async function onVariantToggle() { if (!currentRaw || processing) return; const variant = document.querySelector('input[name=tryVariant]:checked').value; if (currentResults[variant]) { rebuildPlayerFromCache(variant); } else { await enhanceCurrentRaw(variant); } } // ── Page-wide drag-and-drop ─────────────────────────────────────────── // Drop any audio/video file anywhere on the page → drawer opens, model // loads (if it hasn't yet), file processes once ready. const AUDIO_EXT_RE = /\.(wav|mp3|m4a|m4b|mp4|mov|aac|ogg|opus|flac|webm|aiff?)$/i; function isAudioFile(file) { if (file.type && (file.type.startsWith('audio/') || file.type.startsWith('video/'))) return true; return AUDIO_EXT_RE.test(file.name); } function deliverFileToInput(file) { const dt = new DataTransfer(); dt.items.add(file); const input = $('tryFileInput'); input.files = dt.files; input.dispatchEvent(new Event('change')); } let dragDepth = 0; document.addEventListener('dragenter', (e) => { if (!e.dataTransfer?.types.includes('Files')) return; dragDepth++; document.body.classList.add('drag-over'); }); document.addEventListener('dragleave', () => { dragDepth = Math.max(0, dragDepth - 1); if (dragDepth === 0) document.body.classList.remove('drag-over'); }); document.addEventListener('dragover', (e) => { if (e.dataTransfer?.types.includes('Files')) e.preventDefault(); }); document.addEventListener('drop', (e) => { if (!e.dataTransfer?.files?.length) return; e.preventDefault(); dragDepth = 0; document.body.classList.remove('drag-over'); const file = e.dataTransfer.files[0]; scrollToSection(); if (!isAudioFile(file)) { setStatus(`Not an audio file: ${truncateName(file.name)}`); return; } // Same flow as clicking Pick file — the change handler auto-loads the // model if it isn't yet, then decodes + enhances. deliverFileToInput(file); }); // ── M4A render-on-request ───────────────────────────────────────────── // `Save as .m4a` only renders when the user actually asks for it (AAC // encode + MP4 mux is heavier than the WAV path), and only after we've // produced an enhanced result. Mediabunny does both stages on the main // thread; for the demo's chunk sizes that's fine. $('tryDownloadM4a')?.addEventListener('click', async () => { if (!currentRaw || !lastEnhanced || !currentFile) return; const btn = $('tryDownloadM4a'); btn.disabled = true; btn.textContent = 'Encoding…'; try { if (m4aURL) { URL.revokeObjectURL(m4aURL); m4aURL = null; } const mixed = blendSamples(currentRaw, lastEnhanced.samples, currentMix()); const blob = await encodeM4A(mixed, lastEnhanced.sampleRate); triggerDownload(blob, `${currentFile.stem}.clear.m4a`); btn.textContent = 'Save as .m4a'; } catch (e) { console.error('[try] m4a encode failed:', e); btn.textContent = 'm4a render failed'; } finally { btn.disabled = false; } }); // ── Auto-load model on demand ───────────────────────────────────────── // First file pick triggers the download; switching variant between picks // reloads. Multiple concurrent calls dedupe via `modelLoading`. async function ensureModel(variant) { if (clearVariant === variant && clear) return clear; if (modelLoading) { try { await modelLoading; } catch {} if (clearVariant === variant && clear) return clear; } const label = variant === 'natural' ? 'clear-natural' : 'clear-studio'; if (clear) { try { await clear.dispose(); } catch {} clear = null; clearVariant = null; // Give ORT a tick to actually free the GPU device before we request a // new adapter — otherwise WebGPU init can fail OOM on the next call. await new Promise((r) => requestAnimationFrame(r)); } modelLoading = (async () => { setStatus(`Loading ${label}…`); const instance = await Clear.create({ variant, forceWasm, onDownloadProgress: (loaded, total) => { if (!total) return; const mb = (n) => (n / 1_048_576).toFixed(1); setStatus(`Loading ${label} · ${mb(loaded)} / ${mb(total)} MB`); }, onPhase: (phase) => { if (phase === 'compiling-webgpu') setStatus(`Compiling ${label} for WebGPU…`); else if (phase === 'compiling-wasm') setStatus(`Compiling ${label} for WASM…`); }, }); clear = instance; clearVariant = variant; return instance; })(); try { return await modelLoading; } finally { modelLoading = null; } } // ── File pick → decode → enhance → player ───────────────────────────── $('tryFileInput').addEventListener('change', async (e) => { const file = e.target.files?.[0]; if (!file) return; teardownPlayer(); currentFile = { name: file.name, stem: file.name.replace(/\.[^.]+$/, ''), ext: file.name.toLowerCase().match(/\.([^.]+)$/)?.[1] ?? '', }; currentResults = {}; currentRaw = null; setStatus(`Decoding ${truncateName(file.name)}…`); try { currentRaw = await decodeToMono(file); } catch (err) { setStatus(`Couldn't decode that file: ${err.message || err}`); return; } const variant = document.querySelector('input[name=tryVariant]:checked').value; await enhanceCurrentRaw(variant); }); async function enhanceCurrentRaw(variant) { if (!currentRaw || !currentFile) return; if (processing) return; processing = true; $('tryPlayer')?.classList.add('is-processing'); try { try { await ensureModel(variant); } catch (err) { setStatus(`Failed to load ${variant}: ${err.message || err}`); return; } const shortName = truncateName(currentFile.name); setStatus(`Enhancing ${shortName}…`); const t0 = performance.now(); let result; try { result = await clear.enhance(currentRaw, { mastering: 'applePodcasts', onProgress: (stage, frac) => { if (stage === 'inference') setStatus(`Enhancing ${shortName} · ${Math.round(frac * 100)}%`); }, }); } catch (err) { setStatus(`Enhance failed: ${err.message || err}`); return; } const processingSec = (performance.now() - t0) / 1000; currentResults[variant] = { ...result, processingSec }; rebuildPlayerFromCache(variant); } finally { processing = false; $('tryPlayer')?.classList.remove('is-processing'); } } function rebuildPlayerFromCache(variant) { const result = currentResults[variant]; if (!result || !currentRaw || !currentFile) return; // Fast path — both variants already inside the player. Gain-swap only. if (player && player.hasVariant(variant)) { player.setActiveVariant(variant); updateMetaAndSave(variant, result); return; } // Slow path — variant just became available. Rebuild with all cached // tracks, restoring playback position + state so the listener doesn't // notice (a brief gap during the rebuild itself is unavoidable). const lastPos = player ? player.getPosition() : 0; const wasPlaying = player ? player.isPlaying : false; if (player) { player.dispose(); player = null; } const enhancedTracks = {}; for (const [v, r] of Object.entries(currentResults)) { if (r?.audio) enhancedTracks[v] = r.audio; } player = createABPlayer({ raw: currentRaw, enhancedTracks, activeVariant: variant, sampleRate: SR, }); $('tryPlayer').classList.remove('is-empty'); $('tryPlayerFilename').textContent = currentFile.name; $('tryMixSlider').disabled = false; $('tryPlayBtn').disabled = false; if (lastPos > 0) player.seekTo(lastPos); if (wasPlaying) player.play(); updateMetaAndSave(variant, result); } /** Update the result meta sentence + Save link + m4a button — runs both * on fresh-variant rebuilds and on cached-variant gain swaps. */ function updateMetaAndSave(variant, result) { const wavBlob = encodeWav(result.audio, result.sampleRate); if (enhancedURL) URL.revokeObjectURL(enhancedURL); enhancedURL = URL.createObjectURL(wavBlob); const saveLink = $('tryPlayerSaveLink'); saveLink.href = enhancedURL; saveLink.download = `${currentFile.stem}.clear.wav`; if (m4aURL) { URL.revokeObjectURL(m4aURL); m4aURL = null; } lastEnhanced = { samples: result.audio, sampleRate: result.sampleRate, stem: `${currentFile.stem}.clear`, }; const m4aBtn = $('tryDownloadM4a'); if (M4A_INPUT_EXTS.includes(currentFile.ext)) { m4aBtn.style.display = 'inline-block'; m4aBtn.disabled = false; m4aBtn.textContent = 'Save as .m4a'; } else { m4aBtn.style.display = 'none'; } $('tryPlayerMetaText').textContent = buildMetaSentence(variant, result, clear.backend); } /** Result line in the curated-card transcript style — two-line sentence. * Variant is shown in the active toggle pill above, no need to repeat. */ function buildMetaSentence(variant, result, backend) { const dur = result.durationSec < 60 ? `${result.durationSec.toFixed(1)} seconds` : `${fmtTime(result.durationSec)} minutes`; const proc = `${result.processingSec.toFixed(1)}s`; const rt = result.processingSec > 0 ? result.durationSec / result.processingSec : 0; const back = backend === 'webgpu' ? 'WebGPU' : 'WASM'; return `Enhanced ${dur} in ${proc}.\n${Math.round(rt)}× realtime on ${back}.`; } // ── Sample-aligned A/B player (raw ↔ enhanced via mix slider) ───────── /** * Sample-aligned multi-track A/B player. Raw + every cached enhanced * variant ({ studio, natural, ... }) are scheduled in parallel and routed * through one GainNode each. Toggling between variants is just a gain * swap — playback continues without restart. The mix slider crossfades * raw against whichever enhanced track is currently active. */ function createABPlayer({ raw, enhancedTracks, activeVariant, sampleRate }) { let ctx = null; let bufRaw = null; const bufsEnh = {}; // variant → AudioBuffer let srcRaw = null; const srcsEnh = {}; // variant → AudioBufferSourceNode let gainRaw = null; const gainsEnh = {}; // variant → GainNode let isPlaying = false; let playStartCtxTime = 0; let pausePos = 0; let mix = 1.0; let active = activeVariant; let rafId = null; let unlocked = false; // iOS Safari: needs a silent buffer kick on first play const longest = Math.max( raw.length, ...Object.values(enhancedTracks).map((s) => s.length), ); const duration = longest / sampleRate; const playBtn = $('tryPlayBtn'); const scrubber = $('tryScrubber'); const progress = scrubber.querySelector('.progress'); const cur = scrubber.parentElement.querySelector('.cur'); const tot = scrubber.parentElement.querySelector('.tot'); const slider = $('tryMixSlider'); function ensureCtx() { if (ctx) return; const Ctor = window.AudioContext || window.webkitAudioContext; // iOS 14 and older Safari reject the sampleRate constructor arg — // fall back to a default-rate context. AudioBuffers we make below // still declare sampleRate=48 kHz; the source node auto-resamples // to whatever ctx.sampleRate actually is. try { ctx = new Ctor({ sampleRate }); } catch { ctx = new Ctor(); } bufRaw = ctx.createBuffer(1, raw.length, sampleRate); bufRaw.copyToChannel(raw, 0); gainRaw = ctx.createGain(); gainRaw.connect(ctx.destination); for (const [v, samples] of Object.entries(enhancedTracks)) { bufsEnh[v] = ctx.createBuffer(1, samples.length, sampleRate); bufsEnh[v].copyToChannel(samples, 0); gainsEnh[v] = ctx.createGain(); gainsEnh[v].connect(ctx.destination); } applyMix(); } /** iOS Safari unlock — fires a 1-sample silent buffer on first play so * subsequent AudioBufferSourceNode.start() calls actually produce sound. * No-op on desktop. Must run inside the user-gesture call stack. */ function unlockIOSAudio() { if (unlocked || !ctx) return; try { const src = ctx.createBufferSource(); src.buffer = ctx.createBuffer(1, 1, ctx.sampleRate); src.connect(ctx.destination); src.start(0); unlocked = true; } catch {} } function applyMix() { if (!ctx) return; gainRaw.gain.value = 1 - mix; for (const v of Object.keys(gainsEnh)) { gainsEnh[v].gain.value = (v === active) ? mix : 0; } } function startSources(offset) { const startAt = ctx.currentTime + START_LEAD; srcRaw = ctx.createBufferSource(); srcRaw.buffer = bufRaw; srcRaw.connect(gainRaw); srcRaw.start(startAt, offset); for (const v of Object.keys(bufsEnh)) { const s = ctx.createBufferSource(); s.buffer = bufsEnh[v]; s.connect(gainsEnh[v]); s.start(startAt, offset); srcsEnh[v] = s; } srcRaw.onended = () => { if (isPlaying) { isPlaying = false; pausePos = 0; playBtn.dataset.state = 'paused'; playBtn.innerHTML = ICON_PLAY; stopRaf(); } }; playStartCtxTime = startAt; isPlaying = true; pausePos = offset; startRaf(); } function stopSources() { for (const s of [srcRaw, ...Object.values(srcsEnh)]) { if (!s) continue; try { s.onended = null; s.stop(); } catch {} try { s.disconnect(); } catch {} } srcRaw = null; for (const v of Object.keys(srcsEnh)) delete srcsEnh[v]; isPlaying = false; stopRaf(); } function currentPos() { if (isPlaying) { const elapsed = pausePos + (ctx.currentTime - playStartCtxTime); return Math.min(duration, Math.max(0, elapsed)); } return pausePos; } function syncTimes() { const p = currentPos(); cur.textContent = fmtTime(p); tot.textContent = fmtTime(duration); progress.style.width = `${Math.min(100, (p / duration) * 100)}%`; } function startRaf() { if (rafId) return; const tick = () => { syncTimes(); if (isPlaying) rafId = requestAnimationFrame(tick); }; rafId = requestAnimationFrame(tick); } function stopRaf() { if (rafId) { cancelAnimationFrame(rafId); rafId = null; } syncTimes(); } async function play() { ensureCtx(); // Fire the iOS unlock SYNCHRONOUSLY inside the user gesture stack — // before any await — so Safari registers it as a user-initiated // playback. Calling ctx.resume() and starting the silent source // both have to happen within the same gesture-induced call. unlockIOSAudio(); if (ctx.state !== 'running') { const resumed = ctx.resume(); // Some Safaris return undefined here; guard before awaiting. if (resumed && typeof resumed.then === 'function') await resumed; } startSources(pausePos); } function pause() { const pos = currentPos(); stopSources(); pausePos = Math.min(duration, pos); } function seekTo(t) { const wasPlaying = isPlaying; if (wasPlaying) stopSources(); pausePos = Math.max(0, Math.min(duration - 0.01, t)); if (wasPlaying) startSources(pausePos); else syncTimes(); } function paintSlider() { slider.style.setProperty('--p', `${slider.value}%`); } const onPlayClick = async () => { if (!isPlaying) { await play(); playBtn.dataset.state = 'playing'; playBtn.innerHTML = ICON_PAUSE; } else { pause(); playBtn.dataset.state = 'paused'; playBtn.innerHTML = ICON_PLAY; } }; const onScrubClick = (e) => { const rect = scrubber.getBoundingClientRect(); const pct = (e.clientX - rect.left) / rect.width; seekTo(pct * duration); }; const onSlider = () => { mix = slider.value / 100; applyMix(); paintSlider(); }; const mixEnds = document.querySelectorAll('#tryPlayer .mix-end'); const mixEndHandlers = []; mixEnds.forEach((btn) => { const h = () => { slider.value = Number(btn.dataset.mix); mix = slider.value / 100; applyMix(); paintSlider(); }; btn.addEventListener('click', h); mixEndHandlers.push([btn, h]); }); playBtn.dataset.state = 'paused'; playBtn.innerHTML = ICON_PLAY; slider.value = 100; mix = 1.0; paintSlider(); syncTimes(); playBtn.addEventListener('click', onPlayClick); scrubber.addEventListener('click', onScrubClick); slider.addEventListener('input', onSlider); return { get isPlaying() { return isPlaying; }, getPosition: currentPos, hasVariant(v) { return v in enhancedTracks; }, /** Swap which enhanced track is heard — seamless gain change, no restart. */ setActiveVariant(v) { if (!(v in enhancedTracks)) return false; active = v; applyMix(); return true; }, async play() { if (isPlaying) return; await play(); playBtn.dataset.state = 'playing'; playBtn.innerHTML = ICON_PAUSE; }, seekTo, pauseIfPlaying() { if (isPlaying) { pause(); playBtn.dataset.state = 'paused'; playBtn.innerHTML = ICON_PLAY; } }, dispose() { stopSources(); playBtn.removeEventListener('click', onPlayClick); scrubber.removeEventListener('click', onScrubClick); slider.removeEventListener('input', onSlider); for (const [btn, h] of mixEndHandlers) btn.removeEventListener('click', h); if (ctx) ctx.close(); }, }; }