navvew commited on
Commit
286aae7
Β·
verified Β·
1 Parent(s): ede32cd

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +54 -12
README.md CHANGED
@@ -17,6 +17,8 @@ This is the official model from the paper:
17
 
18
  πŸ“„ **[DocReRank: Single-Page Hard Negative Query Generation for Training Multi-Modal RAG Rerankers](https://arxiv.org/abs/2505.22584)**
19
 
 
 
20
  ---
21
 
22
  ## βœ… Model Overview
@@ -38,26 +40,66 @@ from peft import PeftModel
38
  import torch
39
  from PIL import Image
40
 
41
- # Load base model
42
  base_model = Qwen2VLForConditionalGeneration.from_pretrained(
43
  "Qwen/Qwen2-VL-2B-Instruct",
44
  torch_dtype=torch.bfloat16,
45
  device_map="cuda"
46
  )
47
 
48
- # Load DocReRank adapter
49
  model = PeftModel.from_pretrained(base_model, "DocReRank/DocReRank-Reranker").eval()
50
 
51
- # Load processor
52
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
- # Example query and image
55
- query = "What is the total revenue in the table?"
56
- image = Image.open("sample_page.png")
57
-
58
- inputs = processor(text=query, images=image, return_tensors="pt").to("cuda", torch.bfloat16)
59
-
60
- with torch.no_grad():
61
- outputs = model.generate(**inputs, max_new_tokens=16)
62
 
63
- print(processor.tokenizer.decode(outputs[0], skip_special_tokens=True))
 
17
 
18
  πŸ“„ **[DocReRank: Single-Page Hard Negative Query Generation for Training Multi-Modal RAG Rerankers](https://arxiv.org/abs/2505.22584)**
19
 
20
+ See [Project Page](https://navvewas.github.io/DocReRank/) for more information.
21
+
22
  ---
23
 
24
  ## βœ… Model Overview
 
40
  import torch
41
  from PIL import Image
42
 
43
+ # βœ… Load base model
44
  base_model = Qwen2VLForConditionalGeneration.from_pretrained(
45
  "Qwen/Qwen2-VL-2B-Instruct",
46
  torch_dtype=torch.bfloat16,
47
  device_map="cuda"
48
  )
49
 
50
+ # βœ… Load DocReRank adapter
51
  model = PeftModel.from_pretrained(base_model, "DocReRank/DocReRank-Reranker").eval()
52
 
53
+ # βœ… Load processor
54
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
55
+ processor.image_processor.min_pixels = 200704
56
+ processor.image_processor.max_pixels = 589824
57
+
58
+ # βœ… Define query and images
59
+ query_text = "What are the performances of DocReRank model on resturants and Biomedical benchmarks?"
60
+ # query_text = "Is there ablations results of DocReRank model?"
61
+
62
+ ### Paper pages to rank
63
+ image_paths = [ "DocReRank_paper_page_2.png", "DocReRank_paper_page_4.png", "DocReRank_paper_page_6.png", "DocReRank_paper_page_8.png"]
64
+
65
+ # βœ… Reranking prompt template
66
+ def compute_score(image_path, query_text):
67
+ image = Image.open(image_path)
68
+ prompt = f"Assert the relevance of the previous image document to the following query, answer True or False. The query is: {query_text}"
69
+ messages = [{"role": "user", "content": [{"type": "image", "image": image}, {"type": "text", "text": prompt}]}]
70
+
71
+ # Tokenize
72
+ text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
73
+ inputs = processor(text=text, images=image, return_tensors="pt").to(model.device, torch.bfloat16)
74
+
75
+ # Compute logits
76
+ with torch.no_grad():
77
+ outputs = model(**inputs)
78
+ logits = outputs.logits[:, -1, :]
79
+ true_id = processor.tokenizer.convert_tokens_to_ids("True")
80
+ false_id = processor.tokenizer.convert_tokens_to_ids("False")
81
+ probs = torch.softmax(logits[:, [true_id, false_id]], dim=-1)
82
+ relevance_score = probs[0, 0].item() # Probability of "True"
83
+
84
+ return relevance_score
85
+
86
+ # βœ… Compute scores for both images
87
+ scores = [(img, compute_score(img, query_text)) for img in image_paths]
88
+
89
+ # βœ… Print results
90
+ for img, score in scores:
91
+ print(f"Image: {img} | Relevance Score: {score:.4f}")
92
+ ```
93
+
94
+ ## Citation
95
+ If you use this dataset, please cite:
96
+ ```bibtex
97
+ @article{wasserman2025docrerank,
98
+ title={DocReRank: Single-Page Hard Negative Query Generation for Training Multi-Modal RAG Rerankers},
99
+ author={Wasserman, Navve and Heinimann, Oliver and Golbari, Yuval and Zimbalist, Tal and Schwartz, Eli and Irani, Michal},
100
+ journal={arXiv preprint arXiv:2505.22584},
101
+ year={2025}
102
+ }
103
+ ```
104
 
 
 
 
 
 
 
 
 
105