import numpy as np type BinaryPackedEmbedding = np.ndarray[tuple[int], np.dtype[np.uint8]] def binary_quantize(embedding: np.ndarray) -> np.ndarray: # TODO: [1] mentions that quantization can also be done by the model # during encoding. Need to test whether that is faster. # [1]: https://www.sbert.net/examples/sentence_transformer/applications/embedding-quantization/README.html#binary-quantization-in-sentence-transformers binary_embedding = embedding > 0 return binary_embedding # return np.packbits(binary_embedding)