Highlights
Contextual AI's reranker is the first instruction-following reranker capable of handling retrieval conflicts and ranking with custom instructions (e.g., prioritizing recent information). It achieves state-of-the-art performance on BEIR and sits on the cost/performance Pareto frontier across:
- Instruction following
- Question answering
- Multilinguality (100+ languages)
- Product search & recommendation
- Real-world use cases
For detailed benchmarks, see our blog post.
Overview
- Model Type: Text Reranking
- Supported Languages: 100+
- Parameters: 1B
- Context Length: up to 32K
When to Use This Model
Use this reranker when you need to:
- Re-rank retrieved documents with custom instructions
- Handle conflicting information in retrieval results
- Prioritize documents by recency or other criteria
- Support multilingual search (100+ languages)
- Process long contexts (up to 32K tokens)
Quickstart
Basic Usage
model_path = "ContextualAI/ctxl-rerank-v2-instruct-multilingual-1b"
query = "What are the health benefits of exercise?"
instruction = "Prioritize recent medical research"
documents = [
"Regular exercise reduces risk of heart disease and improves mental health.",
"A 2024 study shows exercise enhances cognitive function in older adults.",
"Ancient Greeks valued physical fitness for military training."
]
infer_w_vllm(model_path, query, instruction, documents)
infer_w_hf(model_path, query, instruction, documents)
Expected Output:
Query: What are the health benefits of exercise?
Instruction: Prioritize recent medical research
Score: 0.8542 | Doc: A 2024 study shows exercise enhances cognitive function in older adults.
Score: 0.7891 | Doc: Regular exercise reduces risk of heart disease and improves mental health.
Score: 0.4123 | Doc: Ancient Greeks valued physical fitness for military training.
vLLM Usage (Recommended for Production)
Requires vllm==0.10.0 for NVFP4 or vllm>=0.8.5 for BF16.
import os
os.environ['VLLM_USE_V1'] = '0'
import torch
from vllm import LLM, SamplingParams
def logits_processor(_, scores):
"""Custom logits processor for vLLM reranking."""
index = scores[0].view(torch.uint16)
scores = torch.full_like(scores, float("-inf"))
scores[index] = 1
return scores
def format_prompts(query: str, instruction: str, documents: list[str]) -> list[str]:
"""Format query and documents into prompts for reranking."""
if instruction:
instruction = f" {instruction}"
prompts = []
for doc in documents:
prompt = f"Check whether a given document contains information helpful to answer the query.\n<Document> {doc}\n<Query> {query}{instruction} ??"
prompts.append(prompt)
return prompts
def infer_w_vllm(model_path: str, query: str, instruction: str, documents: list[str]):
model = LLM(
model=model_path,
gpu_memory_utilization=0.85,
max_model_len=8192,
dtype="bfloat16",
max_logprobs=2,
max_num_batched_tokens=262144,
)
sampling_params = SamplingParams(
temperature=0,
max_tokens=1,
logits_processors=[logits_processor]
)
prompts = format_prompts(query, instruction, documents)
outputs = model.generate(prompts, sampling_params, use_tqdm=False)
results = []
for i, output in enumerate(outputs):
score = (
torch.tensor([output.outputs[0].token_ids[0]], dtype=torch.uint16)
.view(torch.bfloat16)
.item()
)
results.append((score, i, documents[i]))
results = sorted(results, key=lambda x: x[0], reverse=True)
print(f"Query: {query}")
print(f"Instruction: {instruction}")
for score, doc_id, doc in results:
print(f"Score: {score:.4f} | Doc: {doc}")
if __name__ == "__main__":
model_path = "ContextualAI/ctxl-rerank-v2-instruct-multilingual-1b"
query = "What are the health benefits of exercise?"
instruction = "Prioritize recent medical research"
documents = [
"Regular exercise reduces risk of heart disease and improves mental health.",
"A 2024 study shows exercise enhances cognitive function in older adults.",
"Ancient Greeks valued physical fitness for military training."
]
infer_w_vllm(model_path, query, instruction, documents)
Transformers Usage (Simpler Setup)
Requires transformers>=4.51.0 for BF16. Not supported for NVFP4.
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
def format_prompts(query: str, instruction: str, documents: list[str]) -> list[str]:
"""Format query and documents into prompts for reranking."""
if instruction:
instruction = f" {instruction}"
prompts = []
for doc in documents:
prompt = f"Check whether a given document contains information helpful to answer the query.\n<Document> {doc}\n<Query> {query}{instruction} ??"
prompts.append(prompt)
return prompts
def infer_w_hf(model_path: str, query: str, instruction: str, documents: list[str]):
device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=dtype).to(device)
model.eval()
prompts = format_prompts(query, instruction, documents)
enc = tokenizer(
prompts,
return_tensors="pt",
padding=True,
truncation=True,
)
input_ids = enc["input_ids"].to(device)
attention_mask = enc["attention_mask"].to(device)
with torch.no_grad():
out = model(input_ids=input_ids, attention_mask=attention_mask)
next_logits = out.logits[:, -1, :]
scores_bf16 = next_logits[:, 0].to(torch.bfloat16)
scores = scores_bf16.float().tolist()
results = sorted([(s, i, documents[i]) for i, s in enumerate(scores)], key=lambda x: x[0], reverse=True)
print(f"Query: {query}")
print(f"Instruction: {instruction}")
for score, doc_id, doc in results:
print(f"Score: {score:.4f} | Doc: {doc}")
if __name__ == "__main__":
model_path = "ContextualAI/ctxl-rerank-v2-instruct-multilingual-1b"
query = "What are the health benefits of exercise?"
instruction = "Prioritize recent medical research"
documents = [
"Regular exercise reduces risk of heart disease and improves mental health.",
"A 2024 study shows exercise enhances cognitive function in older adults.",
"Ancient Greeks valued physical fitness for military training."
]
infer_w_hf(model_path, query, instruction, documents)
Citation
If you use this model, please cite:
@misc{ctxl_rerank_v2_instruct_multilingual,
title={Contextual AI Reranker v2},
author={Halal, George and Agrawal, Sheshansh},
year={2025},
url={https://contextual.ai/blog/rerank-v2},
}
License
Creative Commons Attribution Non Commercial Share Alike 4.0 (cc-by-nc-sa-4.0)
Contact
For questions or issues, please open an issue on the model repository.