JigsawStack
/

moondream2-batched

@@ -77,22 +77,57 @@ class KVCache(nn.Module):
             "v_cache", torch.zeros(*cache_shape, device=device, dtype=dtype)
         )
     def update(self, pos_ids, k, v):
-        kout, vout = self.k_cache, self.v_cache
-        # pos_ids: scalar (int or 0-D) OR LongTensor[B]
-        if not torch.is_tensor(pos_ids) or pos_ids.ndim == 0:
-            # singleton batch
             kout[:, :, pos_ids, :] = k
             vout[:, :, pos_ids, :] = v
-        else:
-            # batched: write each row into its own position
-            B = k.size(0)
-            # Safe, explicit per-row scatter (B is usually small)
             for i in range(B):
-                pi = int(pos_ids[i].item())
-                kout[i, :, pi, :] = k[i]
-                vout[i, :, pi, :] = v[i]
-        return kout, vout

             "v_cache", torch.zeros(*cache_shape, device=device, dtype=dtype)
         )
+    # in class KVCache
     def update(self, pos_ids, k, v):
+        """
+        Supports both:
+        - PREFILL:  pos_ids shape == (q_len,), k/v shape == (B, H, q_len, D)
+        - STEP-DECODE (batched): pos_ids shape == (B,), k/v shape == (B, H, 1, D)
+        - STEP-DECODE (single):  scalar pos_ids,   k/v shape == (1, H, 1, D)
+        """
+        kout, vout = self.k_cache, self.v_cache  # (Bcache, H, T, D)
+        B, H, Q, D = k.shape
+        # Case A: PREFILL — a vector of all time indices
+        if torch.is_tensor(pos_ids) and pos_ids.ndim == 1 and pos_ids.numel() == Q and Q > 1:
+            # broadcast batch dimension into cache if needed
+            if kout.size(0) != B:
+                # grow/shrink the first dim to match B (this happens after you cloned
+                # image caches to B rows for batched prefill)
+                new_k = kout.new_zeros((B,) + tuple(kout.shape[1:]))
+                new_v = vout.new_zeros((B,) + tuple(vout.shape[1:]))
+                # copy row 0 as base (image prefix) into all rows
+                new_k[:] = kout[0]
+                new_v[:] = vout[0]
+                self.k_cache = kout = new_k
+                self.v_cache = vout = new_v
+            # write the whole segment for all rows at once
             kout[:, :, pos_ids, :] = k
             vout[:, :, pos_ids, :] = v
+            return kout, vout
+        # Case B: STEP-DECODE (batched) — one position per row, q_len == 1
+        if torch.is_tensor(pos_ids) and pos_ids.ndim == 1 and pos_ids.numel() == B and Q == 1:
             for i in range(B):
+                pi = int(pos_ids[i])
+                kout[i, :, pi, :] = k[i, :, 0, :]
+                vout[i, :, pi, :] = v[i, :, 0, :]
+            return kout, vout
+        # Case C: STEP-DECODE (single) — scalar pos, B==1, q_len==1
+        if (not torch.is_tensor(pos_ids)) or pos_ids.ndim == 0:
+            pi = int(pos_ids)
+            kout[:B, :, pi, :] = k[:, :, 0, :]
+            vout[:B, :, pi, :] = v[:, :, 0, :]
+            return kout, vout
+        # Fallback: shape combo we didn't expect
+        raise RuntimeError(
+            f"KVCache.update: unsupported shapes pos_ids={tuple(pos_ids.shape) if torch.is_tensor(pos_ids) else '()'}, "
+            f"k={tuple(k.shape)}, v={tuple(v.shape)}"
+        )