import numpy as np

type BinaryPackedEmbedding = np.ndarray[tuple[int], np.dtype[np.uint8]]


def binary_quantize(embedding: np.ndarray) -> np.ndarray:
    # TODO: [1] mentions that quantization can also be done by the model
    # during encoding. Need to test whether that is faster.
    # [1]: https://www.sbert.net/examples/sentence_transformer/applications/embedding-quantization/README.html#binary-quantization-in-sentence-transformers
    binary_embedding = embedding > 0
    return binary_embedding
    # return np.packbits(binary_embedding)