Spaces:
Sleeping
Sleeping
Commit
·
86dd29c
1
Parent(s):
82c8e97
loudness matching debug logs for sanity
Browse files- jam_worker.py +74 -10
jam_worker.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
# jam_worker.py - Bar-locked spool rewrite
|
| 2 |
from __future__ import annotations
|
| 3 |
|
|
|
|
|
|
|
| 4 |
import threading, time
|
| 5 |
from dataclasses import dataclass
|
| 6 |
from fractions import Fraction
|
|
@@ -435,7 +437,7 @@ class JamWorker(threading.Thread):
|
|
| 435 |
This keeps external timing and bar alignment identical, but removes the audible
|
| 436 |
fade-to-zero at chunk ends.
|
| 437 |
"""
|
| 438 |
-
|
| 439 |
|
| 440 |
# ---- unpack model-rate samples ----
|
| 441 |
s = wav.samples.astype(np.float32, copy=False)
|
|
@@ -550,20 +552,77 @@ class JamWorker(threading.Thread):
|
|
| 550 |
return self.idx <= (horizon_anchor + self._max_buffer_ahead)
|
| 551 |
|
| 552 |
def _emit_ready(self):
|
| 553 |
-
"""Emit next chunk(s) if the spool has enough samples."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 554 |
while True:
|
| 555 |
start, end = self._bar_clock.bounds_for_chunk(self.idx, self.params.bars_per_chunk)
|
| 556 |
if end > self._spool_written:
|
| 557 |
-
|
| 558 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 559 |
|
| 560 |
# Loudness match to reference loop (optional)
|
|
|
|
| 561 |
if self.params.ref_loop is not None and self.params.loudness_mode != "none":
|
| 562 |
ref = self.params.ref_loop.as_stereo().resample(self.params.target_sr)
|
| 563 |
wav = au.Waveform(loop.copy(), int(self.params.target_sr))
|
| 564 |
-
|
| 565 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 566 |
|
|
|
|
| 567 |
audio_b64, total_samples, channels = wav_bytes_base64(loop, int(self.params.target_sr))
|
| 568 |
meta = {
|
| 569 |
"bpm": float(self.params.bpm),
|
|
@@ -580,27 +639,31 @@ class JamWorker(threading.Thread):
|
|
| 580 |
}
|
| 581 |
chunk = JamChunk(index=self.idx, audio_base64=audio_b64, metadata=meta)
|
| 582 |
|
|
|
|
| 583 |
with self._cv:
|
| 584 |
self._outbox[self.idx] = chunk
|
| 585 |
self._cv.notify_all()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 586 |
self.idx += 1
|
| 587 |
|
| 588 |
# If a reseed is queued, install it *right after* we finish a chunk
|
| 589 |
with self._lock:
|
| 590 |
-
# Prefer seamless token splice when available
|
| 591 |
if self._pending_token_splice is not None:
|
| 592 |
spliced = self._coerce_tokens(self._pending_token_splice["tokens"])
|
| 593 |
try:
|
| 594 |
-
#
|
| 595 |
-
self.state.context_tokens = spliced
|
| 596 |
self._pending_token_splice = None
|
|
|
|
| 597 |
except Exception:
|
| 598 |
-
# fallback: full reseed using spliced tokens
|
| 599 |
new_state = self.mrt.init_state()
|
| 600 |
new_state.context_tokens = spliced
|
| 601 |
self.state = new_state
|
| 602 |
self._model_stream = None
|
| 603 |
self._pending_token_splice = None
|
|
|
|
| 604 |
elif self._pending_reseed is not None:
|
| 605 |
ctx = self._coerce_tokens(self._pending_reseed["ctx"])
|
| 606 |
new_state = self.mrt.init_state()
|
|
@@ -608,6 +671,7 @@ class JamWorker(threading.Thread):
|
|
| 608 |
self.state = new_state
|
| 609 |
self._model_stream = None
|
| 610 |
self._pending_reseed = None
|
|
|
|
| 611 |
|
| 612 |
# ---------- main loop ----------
|
| 613 |
|
|
|
|
| 1 |
# jam_worker.py - Bar-locked spool rewrite
|
| 2 |
from __future__ import annotations
|
| 3 |
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
import threading, time
|
| 7 |
from dataclasses import dataclass
|
| 8 |
from fractions import Fraction
|
|
|
|
| 437 |
This keeps external timing and bar alignment identical, but removes the audible
|
| 438 |
fade-to-zero at chunk ends.
|
| 439 |
"""
|
| 440 |
+
|
| 441 |
|
| 442 |
# ---- unpack model-rate samples ----
|
| 443 |
s = wav.samples.astype(np.float32, copy=False)
|
|
|
|
| 552 |
return self.idx <= (horizon_anchor + self._max_buffer_ahead)
|
| 553 |
|
| 554 |
def _emit_ready(self):
|
| 555 |
+
"""Emit next chunk(s) if the spool has enough samples. With verbose RMS debug."""
|
| 556 |
+
|
| 557 |
+
|
| 558 |
+
QDB_SILENCE = -55.0 # quarter-bar segment considered "near silence" if RMS dBFS below this
|
| 559 |
+
EPS = 1e-12
|
| 560 |
+
|
| 561 |
+
def rms_dbfs(x: np.ndarray) -> float:
|
| 562 |
+
# x: float32 [-1,1]; return single-channel RMS dBFS (mean over channels if stereo)
|
| 563 |
+
if x.ndim == 2:
|
| 564 |
+
x = x.mean(axis=1)
|
| 565 |
+
rms = float(np.sqrt(np.mean(np.square(x)) + EPS))
|
| 566 |
+
return 20.0 * np.log10(max(rms, EPS))
|
| 567 |
+
|
| 568 |
+
def qbar_rms_dbfs(x: np.ndarray, seg_len: int) -> list[float]:
|
| 569 |
+
vals = []
|
| 570 |
+
if x.ndim == 2:
|
| 571 |
+
mono = x.mean(axis=1)
|
| 572 |
+
else:
|
| 573 |
+
mono = x
|
| 574 |
+
N = mono.shape[0]
|
| 575 |
+
for i in range(0, N, seg_len):
|
| 576 |
+
seg = mono[i:min(i + seg_len, N)]
|
| 577 |
+
if seg.size == 0:
|
| 578 |
+
break
|
| 579 |
+
r = float(np.sqrt(np.mean(seg * seg) + EPS))
|
| 580 |
+
vals.append(20.0 * np.log10(max(r, EPS)))
|
| 581 |
+
return vals
|
| 582 |
+
|
| 583 |
while True:
|
| 584 |
start, end = self._bar_clock.bounds_for_chunk(self.idx, self.params.bars_per_chunk)
|
| 585 |
if end > self._spool_written:
|
| 586 |
+
# Not enough audio buffered for the next full chunk
|
| 587 |
+
# Debug the readiness gap once per idx
|
| 588 |
+
# print(f"[emit idx={self.idx}] need end={end}, have={self._spool_written} (Δ={end - self._spool_written})")
|
| 589 |
+
break
|
| 590 |
+
|
| 591 |
+
# Slice the emitted window (target SR)
|
| 592 |
+
loop = self._spool[start:end] # shape: [samples, channels] @ target_sr
|
| 593 |
+
|
| 594 |
+
# ---- DEBUG: pre-loudness quarter-bar RMS ----
|
| 595 |
+
spb = self._bar_clock.bar_samps # samples per bar @ target_sr
|
| 596 |
+
qlen = max(1, spb // 4) # quarter-bar segment length
|
| 597 |
+
q_rms_pre = qbar_rms_dbfs(loop, qlen)
|
| 598 |
+
# Mark segments that look like near-silence
|
| 599 |
+
silent_marks_pre = ["🟢" if v > QDB_SILENCE else "🟥" for v in q_rms_pre[:8]]
|
| 600 |
+
print(f"[emit idx={self.idx}] pre-LM qRMS dBFS: {['%5.1f'%v for v in q_rms_pre[:8]]} {''.join(silent_marks_pre)}")
|
| 601 |
|
| 602 |
# Loudness match to reference loop (optional)
|
| 603 |
+
gain_db_applied = None
|
| 604 |
if self.params.ref_loop is not None and self.params.loudness_mode != "none":
|
| 605 |
ref = self.params.ref_loop.as_stereo().resample(self.params.target_sr)
|
| 606 |
wav = au.Waveform(loop.copy(), int(self.params.target_sr))
|
| 607 |
+
try:
|
| 608 |
+
matched, gain_db_applied = match_loudness_to_reference(
|
| 609 |
+
ref, wav,
|
| 610 |
+
method=self.params.loudness_mode,
|
| 611 |
+
headroom_db=self.params.headroom_db
|
| 612 |
+
)
|
| 613 |
+
loop = matched.samples
|
| 614 |
+
except Exception as e:
|
| 615 |
+
print(f"[emit idx={self.idx}] loudness-match ERROR: {e}; proceeding with un-matched audio")
|
| 616 |
+
|
| 617 |
+
# ---- DEBUG: post-loudness quarter-bar RMS ----
|
| 618 |
+
q_rms_post = qbar_rms_dbfs(loop, qlen)
|
| 619 |
+
silent_marks_post = ["🟢" if v > QDB_SILENCE else "🟥" for v in q_rms_post[:8]]
|
| 620 |
+
if gain_db_applied is None:
|
| 621 |
+
print(f"[emit idx={self.idx}] post-LM qRMS dBFS: {['%5.1f'%v for v in q_rms_post[:8]]} {''.join(silent_marks_post)} (LM: none)")
|
| 622 |
+
else:
|
| 623 |
+
print(f"[emit idx={self.idx}] post-LM qRMS dBFS: {['%5.1f'%v for v in q_rms_post[:8]]} {''.join(silent_marks_post)} (LM gain {gain_db_applied:+.2f} dB)")
|
| 624 |
|
| 625 |
+
# Encode & ship
|
| 626 |
audio_b64, total_samples, channels = wav_bytes_base64(loop, int(self.params.target_sr))
|
| 627 |
meta = {
|
| 628 |
"bpm": float(self.params.bpm),
|
|
|
|
| 639 |
}
|
| 640 |
chunk = JamChunk(index=self.idx, audio_base64=audio_b64, metadata=meta)
|
| 641 |
|
| 642 |
+
# Emit to outbox
|
| 643 |
with self._cv:
|
| 644 |
self._outbox[self.idx] = chunk
|
| 645 |
self._cv.notify_all()
|
| 646 |
+
|
| 647 |
+
# ---- DEBUG: boundary bookkeeping ----
|
| 648 |
+
print(f"[emit idx={self.idx}] slice [{start}:{end}] (len={end-start}), spool_written={self._spool_written}")
|
| 649 |
+
|
| 650 |
self.idx += 1
|
| 651 |
|
| 652 |
# If a reseed is queued, install it *right after* we finish a chunk
|
| 653 |
with self._lock:
|
|
|
|
| 654 |
if self._pending_token_splice is not None:
|
| 655 |
spliced = self._coerce_tokens(self._pending_token_splice["tokens"])
|
| 656 |
try:
|
| 657 |
+
self.state.context_tokens = spliced # in-place update
|
|
|
|
| 658 |
self._pending_token_splice = None
|
| 659 |
+
print(f"[emit idx={self.idx}] installed token splice (in-place)")
|
| 660 |
except Exception:
|
|
|
|
| 661 |
new_state = self.mrt.init_state()
|
| 662 |
new_state.context_tokens = spliced
|
| 663 |
self.state = new_state
|
| 664 |
self._model_stream = None
|
| 665 |
self._pending_token_splice = None
|
| 666 |
+
print(f"[emit idx={self.idx}] installed token splice (reinit state)")
|
| 667 |
elif self._pending_reseed is not None:
|
| 668 |
ctx = self._coerce_tokens(self._pending_reseed["ctx"])
|
| 669 |
new_state = self.mrt.init_state()
|
|
|
|
| 671 |
self.state = new_state
|
| 672 |
self._model_stream = None
|
| 673 |
self._pending_reseed = None
|
| 674 |
+
print(f"[emit idx={self.idx}] performed full reseed")
|
| 675 |
|
| 676 |
# ---------- main loop ----------
|
| 677 |
|