pveugen commited on
Commit
70ba91a
·
verified ·
1 Parent(s): 16ffbde

Clear demo: ten Detail recordings, before/after

Browse files
Files changed (2) hide show
  1. index.html +0 -7
  2. try-browser.js +153 -131
index.html CHANGED
@@ -272,13 +272,6 @@ Works best with files shorter than 10 minutes.</span>
272
  <span class="label">Try with your audio</span>
273
  </a>
274
 
275
- <!-- Two hidden <audio> elements drive the raw + enhanced playback in
276
- parallel; their `.volume` properties carry the mix slider (raw vs
277
- active enhanced). Using <audio> instead of Web Audio routes through
278
- iOS's media playback channel, which ignores the ringer/silent switch. -->
279
- <audio id="audioRaw" preload="auto" playsinline aria-hidden="true"></audio>
280
- <audio id="audioEnh" preload="auto" playsinline aria-hidden="true"></audio>
281
-
282
  <script src="main.js"></script>
283
  <script src="try-browser.js" type="module"></script>
284
  </body>
 
272
  <span class="label">Try with your audio</span>
273
  </a>
274
 
 
 
 
 
 
 
 
275
  <script src="main.js"></script>
276
  <script src="try-browser.js" type="module"></script>
277
  </body>
try-browser.js CHANGED
@@ -13,7 +13,6 @@
13
  import { Clear, SR, encodeWav, decodeToMono } from './lib-clear/clear.js';
14
  import { encodeM4A } from './lib-clear/encode-m4a.js';
15
 
16
-
17
  const M4A_INPUT_EXTS = ['m4a', 'mp4', 'mov', 'aac', 'm4b'];
18
 
19
  const HF = 'https://huggingface.co/detail-co/clear/resolve/main';
@@ -436,21 +435,36 @@ function buildMetaSentence(variant, result, backend) {
436
  return `Enhanced ${dur} in ${proc}.\n${Math.round(rt)}× realtime on ${back}.`;
437
  }
438
 
439
- // ── A/B player via two <audio> elements ───────────────────────────────
440
- // iOS Safari refuses to play Web Audio through the silent switch, even
441
- // with the parallel-silencer hack. Routing through native <audio>
442
- // elements puts the page on the media playback channel (which ignores
443
- // the silent switch), but we lose:
444
- // - Seamless variant toggle (now there's a brief gap while audio.src
445
- // swaps to the other variant's WAV blob).
446
- // - Sample-accurate parallel scheduling. Two <audio> elements drift
447
- // by a few ms across long clips; we resync currentTime on play.
448
- // What we keep: the live mix slider — raw and enhanced play in parallel
449
- // with `audio.volume` blending between them.
450
 
 
 
 
 
 
 
 
451
  function createABPlayer({ raw, enhancedTracks, activeVariant, sampleRate }) {
452
- const audioRaw = document.getElementById('audioRaw');
453
- const audioEnh = document.getElementById('audioEnh');
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
454
  const playBtn = $('tryPlayBtn');
455
  const scrubber = $('tryScrubber');
456
  const progress = scrubber.querySelector('.progress');
@@ -458,129 +472,139 @@ function createABPlayer({ raw, enhancedTracks, activeVariant, sampleRate }) {
458
  const tot = scrubber.parentElement.querySelector('.tot');
459
  const slider = $('tryMixSlider');
460
 
461
- // Encode each track once as a WAV blob URL — <audio>.src takes a URL.
462
- const rawURL = URL.createObjectURL(encodeWav(raw, sampleRate));
463
- const enhURLs = {};
464
- for (const [v, s] of Object.entries(enhancedTracks)) {
465
- enhURLs[v] = URL.createObjectURL(encodeWav(s, sampleRate));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
466
  }
467
-
468
- let mix = 1.0;
469
- let active = activeVariant;
470
- let rafId = null;
471
- let variantSwapping = false;
472
-
473
- audioRaw.loop = false;
474
- audioEnh.loop = false;
475
- audioRaw.src = rawURL;
476
- audioEnh.src = enhURLs[active];
477
-
478
  function applyMix() {
479
- audioRaw.volume = Math.max(0, Math.min(1, 1 - mix));
480
- audioEnh.volume = Math.max(0, Math.min(1, mix));
 
 
 
481
  }
482
- applyMix();
483
-
484
- function paintSlider() {
485
- slider.style.setProperty('--p', `${slider.value}%`);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
486
  }
487
-
488
- // Use the enhanced track as the timing master. They're the same length
489
- // (mono PCM, identical sample count), so any drift is just clock jitter
490
- // between two parallel HTML media elements. Resync on play/seek.
491
- const getDuration = () => audioEnh.duration || (raw.length / sampleRate);
492
- const getPosition = () => audioEnh.currentTime || 0;
493
-
494
  function syncTimes() {
495
- const p = getPosition();
496
- const d = getDuration();
497
  cur.textContent = fmtTime(p);
498
- tot.textContent = fmtTime(d);
499
- progress.style.width = `${d > 0 ? Math.min(100, (p / d) * 100) : 0}%`;
500
  }
501
  function startRaf() {
502
  if (rafId) return;
503
- const tick = () => {
504
- syncTimes();
505
- if (!audioEnh.paused) rafId = requestAnimationFrame(tick);
506
- else rafId = null;
507
- };
508
  rafId = requestAnimationFrame(tick);
509
  }
510
  function stopRaf() {
511
  if (rafId) { cancelAnimationFrame(rafId); rafId = null; }
512
  syncTimes();
513
  }
514
-
515
  async function play() {
516
- // Resync raw to enhanced before playing so they start aligned.
517
- audioRaw.currentTime = audioEnh.currentTime;
518
- // Both `.play()`s fire from this same user gesture (the play button
519
- // click that called us) so iOS counts them as user-initiated.
520
- await Promise.all([
521
- audioEnh.play().catch(() => {}),
522
- audioRaw.play().catch(() => {}),
523
- ]);
524
- startRaf();
 
 
 
525
  }
526
  function pause() {
527
- audioRaw.pause();
528
- audioEnh.pause();
529
- stopRaf();
530
  }
531
  function seekTo(t) {
532
- const d = getDuration();
533
- const pos = Math.max(0, Math.min(d - 0.01, t));
534
- audioRaw.currentTime = pos;
535
- audioEnh.currentTime = pos;
536
- syncTimes();
537
  }
538
- audioEnh.addEventListener('ended', () => {
539
- audioRaw.pause();
540
- playBtn.dataset.state = 'paused';
541
- playBtn.innerHTML = ICON_PLAY;
542
- stopRaf();
543
- });
544
-
545
- // ── Variant swap (Studio ↔ Natural) ─────────────────────────────────
546
- // <audio>.src swap drops position + state. Capture both, restore after
547
- // the new buffer is ready.
548
- async function setActiveVariant(v) {
549
- if (!(v in enhURLs)) return false;
550
- if (variantSwapping) return false;
551
- if (v === active) return true;
552
- variantSwapping = true;
553
- const wasPlaying = !audioEnh.paused;
554
- const pos = audioEnh.currentTime || 0;
555
- audioEnh.pause();
556
- audioEnh.src = enhURLs[v];
557
- active = v;
558
- try {
559
- await new Promise((resolve) => {
560
- const ready = () => { audioEnh.removeEventListener('loadedmetadata', ready); resolve(); };
561
- if (audioEnh.readyState >= 1) ready();
562
- else audioEnh.addEventListener('loadedmetadata', ready);
563
- });
564
- audioEnh.currentTime = pos;
565
- audioRaw.currentTime = pos;
566
- if (wasPlaying) {
567
- await Promise.all([
568
- audioEnh.play().catch(() => {}),
569
- audioRaw.play().catch(() => {}),
570
- ]);
571
- startRaf();
572
- } else {
573
- syncTimes();
574
- }
575
- } finally {
576
- variantSwapping = false;
577
- }
578
- return true;
579
  }
580
 
581
- // ── Event wiring ─────────────────────────────────────────────────────
582
  const onPlayClick = async () => {
583
- if (audioEnh.paused) {
584
  await play();
585
  playBtn.dataset.state = 'playing';
586
  playBtn.innerHTML = ICON_PAUSE;
@@ -593,7 +617,7 @@ function createABPlayer({ raw, enhancedTracks, activeVariant, sampleRate }) {
593
  const onScrubClick = (e) => {
594
  const rect = scrubber.getBoundingClientRect();
595
  const pct = (e.clientX - rect.left) / rect.width;
596
- seekTo(pct * getDuration());
597
  };
598
  const onSlider = () => { mix = slider.value / 100; applyMix(); paintSlider(); };
599
  const mixEnds = document.querySelectorAll('#tryPlayer .mix-end');
@@ -611,46 +635,44 @@ function createABPlayer({ raw, enhancedTracks, activeVariant, sampleRate }) {
611
  playBtn.dataset.state = 'paused';
612
  playBtn.innerHTML = ICON_PLAY;
613
  slider.value = 100; mix = 1.0;
614
- paintSlider();
615
 
616
  playBtn.addEventListener('click', onPlayClick);
617
  scrubber.addEventListener('click', onScrubClick);
618
  slider.addEventListener('input', onSlider);
619
 
620
- // Once metadata lands on either element, the duration becomes known.
621
- audioEnh.addEventListener('loadedmetadata', syncTimes);
622
- syncTimes();
623
-
624
  return {
625
- get isPlaying() { return !audioEnh.paused; },
626
- getPosition,
627
- hasVariant: (v) => v in enhURLs,
628
- setActiveVariant,
 
 
 
 
 
 
629
  async play() {
630
- if (!audioEnh.paused) return;
631
  await play();
632
  playBtn.dataset.state = 'playing';
633
  playBtn.innerHTML = ICON_PAUSE;
634
  },
635
  seekTo,
636
  pauseIfPlaying() {
637
- if (!audioEnh.paused) {
638
  pause();
639
  playBtn.dataset.state = 'paused';
640
  playBtn.innerHTML = ICON_PLAY;
641
  }
642
  },
643
  dispose() {
644
- pause();
645
- audioEnh.removeEventListener('loadedmetadata', syncTimes);
646
  playBtn.removeEventListener('click', onPlayClick);
647
  scrubber.removeEventListener('click', onScrubClick);
648
  slider.removeEventListener('input', onSlider);
649
  for (const [btn, h] of mixEndHandlers) btn.removeEventListener('click', h);
650
- audioRaw.removeAttribute('src'); audioRaw.load();
651
- audioEnh.removeAttribute('src'); audioEnh.load();
652
- URL.revokeObjectURL(rawURL);
653
- for (const u of Object.values(enhURLs)) URL.revokeObjectURL(u);
654
  },
655
  };
656
  }
 
13
  import { Clear, SR, encodeWav, decodeToMono } from './lib-clear/clear.js';
14
  import { encodeM4A } from './lib-clear/encode-m4a.js';
15
 
 
16
  const M4A_INPUT_EXTS = ['m4a', 'mp4', 'mov', 'aac', 'm4b'];
17
 
18
  const HF = 'https://huggingface.co/detail-co/clear/resolve/main';
 
435
  return `Enhanced ${dur} in ${proc}.\n${Math.round(rt)}× realtime on ${back}.`;
436
  }
437
 
438
+ // ── Sample-aligned A/B player (raw enhanced via mix slider) ─────────
 
 
 
 
 
 
 
 
 
 
439
 
440
+ /**
441
+ * Sample-aligned multi-track A/B player. Raw + every cached enhanced
442
+ * variant ({ studio, natural, ... }) are scheduled in parallel and routed
443
+ * through one GainNode each. Toggling between variants is just a gain
444
+ * swap — playback continues without restart. The mix slider crossfades
445
+ * raw against whichever enhanced track is currently active.
446
+ */
447
  function createABPlayer({ raw, enhancedTracks, activeVariant, sampleRate }) {
448
+ let ctx = null;
449
+ let bufRaw = null;
450
+ const bufsEnh = {}; // variant → AudioBuffer
451
+ let srcRaw = null;
452
+ const srcsEnh = {}; // variant → AudioBufferSourceNode
453
+ let gainRaw = null;
454
+ const gainsEnh = {}; // variant → GainNode
455
+ let isPlaying = false;
456
+ let playStartCtxTime = 0;
457
+ let pausePos = 0;
458
+ let mix = 1.0;
459
+ let active = activeVariant;
460
+ let rafId = null;
461
+ let unlocked = false; // iOS Safari: needs a silent buffer kick on first play
462
+ const longest = Math.max(
463
+ raw.length,
464
+ ...Object.values(enhancedTracks).map((s) => s.length),
465
+ );
466
+ const duration = longest / sampleRate;
467
+
468
  const playBtn = $('tryPlayBtn');
469
  const scrubber = $('tryScrubber');
470
  const progress = scrubber.querySelector('.progress');
 
472
  const tot = scrubber.parentElement.querySelector('.tot');
473
  const slider = $('tryMixSlider');
474
 
475
+ function ensureCtx() {
476
+ if (ctx) return;
477
+ const Ctor = window.AudioContext || window.webkitAudioContext;
478
+ // iOS 14 and older Safari reject the sampleRate constructor arg —
479
+ // fall back to a default-rate context. AudioBuffers we make below
480
+ // still declare sampleRate=48 kHz; the source node auto-resamples
481
+ // to whatever ctx.sampleRate actually is.
482
+ try { ctx = new Ctor({ sampleRate }); }
483
+ catch { ctx = new Ctor(); }
484
+ bufRaw = ctx.createBuffer(1, raw.length, sampleRate);
485
+ bufRaw.copyToChannel(raw, 0);
486
+ gainRaw = ctx.createGain();
487
+ gainRaw.connect(ctx.destination);
488
+ for (const [v, samples] of Object.entries(enhancedTracks)) {
489
+ bufsEnh[v] = ctx.createBuffer(1, samples.length, sampleRate);
490
+ bufsEnh[v].copyToChannel(samples, 0);
491
+ gainsEnh[v] = ctx.createGain();
492
+ gainsEnh[v].connect(ctx.destination);
493
+ }
494
+ applyMix();
495
+ }
496
+ /** iOS Safari unlock — fires a 1-sample silent buffer on first play so
497
+ * subsequent AudioBufferSourceNode.start() calls actually produce sound.
498
+ * No-op on desktop. Must run inside the user-gesture call stack. */
499
+ function unlockIOSAudio() {
500
+ if (unlocked || !ctx) return;
501
+ try {
502
+ const src = ctx.createBufferSource();
503
+ src.buffer = ctx.createBuffer(1, 1, ctx.sampleRate);
504
+ src.connect(ctx.destination);
505
+ src.start(0);
506
+ unlocked = true;
507
+ } catch {}
508
  }
 
 
 
 
 
 
 
 
 
 
 
509
  function applyMix() {
510
+ if (!ctx) return;
511
+ gainRaw.gain.value = 1 - mix;
512
+ for (const v of Object.keys(gainsEnh)) {
513
+ gainsEnh[v].gain.value = (v === active) ? mix : 0;
514
+ }
515
  }
516
+ function startSources(offset) {
517
+ const startAt = ctx.currentTime + START_LEAD;
518
+ srcRaw = ctx.createBufferSource();
519
+ srcRaw.buffer = bufRaw;
520
+ srcRaw.connect(gainRaw);
521
+ srcRaw.start(startAt, offset);
522
+ for (const v of Object.keys(bufsEnh)) {
523
+ const s = ctx.createBufferSource();
524
+ s.buffer = bufsEnh[v];
525
+ s.connect(gainsEnh[v]);
526
+ s.start(startAt, offset);
527
+ srcsEnh[v] = s;
528
+ }
529
+ srcRaw.onended = () => {
530
+ if (isPlaying) {
531
+ isPlaying = false;
532
+ pausePos = 0;
533
+ playBtn.dataset.state = 'paused';
534
+ playBtn.innerHTML = ICON_PLAY;
535
+ stopRaf();
536
+ }
537
+ };
538
+ playStartCtxTime = startAt;
539
+ isPlaying = true;
540
+ pausePos = offset;
541
+ startRaf();
542
+ }
543
+ function stopSources() {
544
+ for (const s of [srcRaw, ...Object.values(srcsEnh)]) {
545
+ if (!s) continue;
546
+ try { s.onended = null; s.stop(); } catch {}
547
+ try { s.disconnect(); } catch {}
548
+ }
549
+ srcRaw = null;
550
+ for (const v of Object.keys(srcsEnh)) delete srcsEnh[v];
551
+ isPlaying = false;
552
+ stopRaf();
553
+ }
554
+ function currentPos() {
555
+ if (isPlaying) {
556
+ const elapsed = pausePos + (ctx.currentTime - playStartCtxTime);
557
+ return Math.min(duration, Math.max(0, elapsed));
558
+ }
559
+ return pausePos;
560
  }
 
 
 
 
 
 
 
561
  function syncTimes() {
562
+ const p = currentPos();
 
563
  cur.textContent = fmtTime(p);
564
+ tot.textContent = fmtTime(duration);
565
+ progress.style.width = `${Math.min(100, (p / duration) * 100)}%`;
566
  }
567
  function startRaf() {
568
  if (rafId) return;
569
+ const tick = () => { syncTimes(); if (isPlaying) rafId = requestAnimationFrame(tick); };
 
 
 
 
570
  rafId = requestAnimationFrame(tick);
571
  }
572
  function stopRaf() {
573
  if (rafId) { cancelAnimationFrame(rafId); rafId = null; }
574
  syncTimes();
575
  }
 
576
  async function play() {
577
+ ensureCtx();
578
+ // Fire the iOS unlock SYNCHRONOUSLY inside the user gesture stack —
579
+ // before any await so Safari registers it as a user-initiated
580
+ // playback. Calling ctx.resume() and starting the silent source
581
+ // both have to happen within the same gesture-induced call.
582
+ unlockIOSAudio();
583
+ if (ctx.state !== 'running') {
584
+ const resumed = ctx.resume();
585
+ // Some Safaris return undefined here; guard before awaiting.
586
+ if (resumed && typeof resumed.then === 'function') await resumed;
587
+ }
588
+ startSources(pausePos);
589
  }
590
  function pause() {
591
+ const pos = currentPos();
592
+ stopSources();
593
+ pausePos = Math.min(duration, pos);
594
  }
595
  function seekTo(t) {
596
+ const wasPlaying = isPlaying;
597
+ if (wasPlaying) stopSources();
598
+ pausePos = Math.max(0, Math.min(duration - 0.01, t));
599
+ if (wasPlaying) startSources(pausePos);
600
+ else syncTimes();
601
  }
602
+ function paintSlider() {
603
+ slider.style.setProperty('--p', `${slider.value}%`);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
604
  }
605
 
 
606
  const onPlayClick = async () => {
607
+ if (!isPlaying) {
608
  await play();
609
  playBtn.dataset.state = 'playing';
610
  playBtn.innerHTML = ICON_PAUSE;
 
617
  const onScrubClick = (e) => {
618
  const rect = scrubber.getBoundingClientRect();
619
  const pct = (e.clientX - rect.left) / rect.width;
620
+ seekTo(pct * duration);
621
  };
622
  const onSlider = () => { mix = slider.value / 100; applyMix(); paintSlider(); };
623
  const mixEnds = document.querySelectorAll('#tryPlayer .mix-end');
 
635
  playBtn.dataset.state = 'paused';
636
  playBtn.innerHTML = ICON_PLAY;
637
  slider.value = 100; mix = 1.0;
638
+ paintSlider(); syncTimes();
639
 
640
  playBtn.addEventListener('click', onPlayClick);
641
  scrubber.addEventListener('click', onScrubClick);
642
  slider.addEventListener('input', onSlider);
643
 
 
 
 
 
644
  return {
645
+ get isPlaying() { return isPlaying; },
646
+ getPosition: currentPos,
647
+ hasVariant(v) { return v in enhancedTracks; },
648
+ /** Swap which enhanced track is heard — seamless gain change, no restart. */
649
+ setActiveVariant(v) {
650
+ if (!(v in enhancedTracks)) return false;
651
+ active = v;
652
+ applyMix();
653
+ return true;
654
+ },
655
  async play() {
656
+ if (isPlaying) return;
657
  await play();
658
  playBtn.dataset.state = 'playing';
659
  playBtn.innerHTML = ICON_PAUSE;
660
  },
661
  seekTo,
662
  pauseIfPlaying() {
663
+ if (isPlaying) {
664
  pause();
665
  playBtn.dataset.state = 'paused';
666
  playBtn.innerHTML = ICON_PLAY;
667
  }
668
  },
669
  dispose() {
670
+ stopSources();
 
671
  playBtn.removeEventListener('click', onPlayClick);
672
  scrubber.removeEventListener('click', onScrubClick);
673
  slider.removeEventListener('input', onSlider);
674
  for (const [btn, h] of mixEndHandlers) btn.removeEventListener('click', h);
675
+ if (ctx) ctx.close();
 
 
 
676
  },
677
  };
678
  }