DocReRank
/

DocReRank-Reranker

@@ -17,6 +17,8 @@ This is the official model from the paper:
 📄 **[DocReRank: Single-Page Hard Negative Query Generation for Training Multi-Modal RAG Rerankers](https://arxiv.org/abs/2505.22584)**
 ---
 ## ✅ Model Overview
@@ -38,26 +40,66 @@ from peft import PeftModel
 import torch
 from PIL import Image
-# Load base model
 base_model = Qwen2VLForConditionalGeneration.from_pretrained(
     "Qwen/Qwen2-VL-2B-Instruct",
     torch_dtype=torch.bfloat16,
     device_map="cuda"
 )
-# Load DocReRank adapter
 model = PeftModel.from_pretrained(base_model, "DocReRank/DocReRank-Reranker").eval()
-# Load processor
 processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
-# Example query and image
-query = "What is the total revenue in the table?"
-image = Image.open("sample_page.png")
-inputs = processor(text=query, images=image, return_tensors="pt").to("cuda", torch.bfloat16)
-with torch.no_grad():
-    outputs = model.generate(**inputs, max_new_tokens=16)
-print(processor.tokenizer.decode(outputs[0], skip_special_tokens=True))

 📄 **[DocReRank: Single-Page Hard Negative Query Generation for Training Multi-Modal RAG Rerankers](https://arxiv.org/abs/2505.22584)**
+See [Project Page](https://navvewas.github.io/DocReRank/) for more information.
 ---
 ## ✅ Model Overview
 import torch
 from PIL import Image
+# ✅ Load base model
 base_model = Qwen2VLForConditionalGeneration.from_pretrained(
     "Qwen/Qwen2-VL-2B-Instruct",
     torch_dtype=torch.bfloat16,
     device_map="cuda"
 )
+# ✅ Load DocReRank adapter
 model = PeftModel.from_pretrained(base_model, "DocReRank/DocReRank-Reranker").eval()
+# ✅ Load processor
 processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
+processor.image_processor.min_pixels = 200704
+processor.image_processor.max_pixels = 589824
+# ✅ Define query and images
+query_text = "What are the performances of DocReRank model on resturants and Biomedical benchmarks?"
+# query_text = "Is there ablations results of DocReRank model?"
+### Paper pages to rank
+image_paths = [ "DocReRank_paper_page_2.png", "DocReRank_paper_page_4.png", "DocReRank_paper_page_6.png", "DocReRank_paper_page_8.png"]
+# ✅ Reranking prompt template
+def compute_score(image_path, query_text):
+    image = Image.open(image_path)
+    prompt = f"Assert the relevance of the previous image document to the following query, answer True or False. The query is: {query_text}"
+    messages = [{"role": "user", "content": [{"type": "image", "image": image}, {"type": "text", "text": prompt}]}]
+    # Tokenize
+    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    inputs = processor(text=text, images=image, return_tensors="pt").to(model.device, torch.bfloat16)
+    # Compute logits
+    with torch.no_grad():
+        outputs = model(**inputs)
+        logits = outputs.logits[:, -1, :]
+        true_id = processor.tokenizer.convert_tokens_to_ids("True")
+        false_id = processor.tokenizer.convert_tokens_to_ids("False")
+        probs = torch.softmax(logits[:, [true_id, false_id]], dim=-1)
+        relevance_score = probs[0, 0].item()  # Probability of "True"
+    return relevance_score
+# ✅ Compute scores for both images
+scores = [(img, compute_score(img, query_text)) for img in image_paths]
+# ✅ Print results
+for img, score in scores:
+    print(f"Image: {img} | Relevance Score: {score:.4f}")
+```
+## Citation
+If you use this dataset, please cite:
+```bibtex
+@article{wasserman2025docrerank,
+  title={DocReRank: Single-Page Hard Negative Query Generation for Training Multi-Modal RAG Rerankers},
+  author={Wasserman, Navve and Heinimann, Oliver and Golbari, Yuval and Zimbalist, Tal and Schwartz, Eli and Irani, Michal},
+  journal={arXiv preprint arXiv:2505.22584},
+  year={2025}
+}
+```