lightonai
/

MonoQwen2-VL-v0.1

@@ -16,30 +16,50 @@ from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
 # Load processor and model
 processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
-model = Qwen2VLForConditionalGeneration.from_pretrained("lightonai/MonoQwen2-VL-2B-LoRA-Reranker")
-# Define the query and the image
-query = "What is the value of the thing in the document"
-image = Image.open("path_to_image.jpg")
-# Prepare the inputs
-prompt = f"Assert the relevance of the previous image document to the following query, answer True or False. The query is: {query}"
-inputs = processor(text=prompt, images=image, return_tensors="pt")
-# Run the model and obtain results
 with torch.no_grad():
     outputs = model(**inputs)
-    logits = outputs.logits
-    logits_for_last_token = logits[:, -1, :]
-    true_token_id = processor.tokenizer.convert_tokens_to_ids("True")
-    false_token_id = processor.tokenizer.convert_tokens_to_ids("False")
-    relevance_score = torch.softmax(logits_for_last_token[:, [true_token_id, false_token_id]], dim=-1)
-# Print the True/False probabilities
-true_prob = relevance_score[:, 0].item()
-false_prob = relevance_score[:, 1].item()
-print(f"True probability: {true_prob}, False probability: {false_prob}")
 ```
 This example demonstrates how to use the model to assess the relevance of an image with respect to a query. It outputs the probability that the image is relevant ("True") or not relevant ("False").

 # Load processor and model
 processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
+model = Qwen2VLForConditionalGeneration.from_pretrained(
+    "lightonai/MonoQwen2-VL-2B-LoRA-Reranker",
+)
+# Define query and load image
+query = "Is this your query about a document ?"
+image_path = "your/path/to/image.png"
+image = Image.open(image_path)
+# Construct the prompt and prepare input
+prompt = (
+    "Assert the relevance of the previous image document to the following query, "
+    "answer True or False. The query is: {query}"
+).format(query=query)
+messages = [
+    {
+        "role": "user",
+        "content": [
+            {"type": "image", "image": image},
+            {"type": "text", "text": prompt},
+        ],
+    }
+]
+# Apply chat template and tokenize
+text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+inputs = processor(text=text, images=image, return_tensors="pt").to("cuda:1")
+# Run inference to obtain logits
 with torch.no_grad():
     outputs = model(**inputs)
+    logits_for_last_token = outputs.logits[:, -1, :]
+# Convert tokens and calculate relevance score
+true_token_id = processor.tokenizer.convert_tokens_to_ids("True")
+false_token_id = processor.tokenizer.convert_tokens_to_ids("False")
+relevance_score = torch.softmax(logits_for_last_token[:, [true_token_id, false_token_id]], dim=-1)
+# Extract and display probabilities
+true_prob = relevance_score[0, 0].item()
+false_prob = relevance_score[0, 1].item()
+print(f"True probability: {true_prob:.4f}, False probability: {false_prob:.4f}")
 ```
 This example demonstrates how to use the model to assess the relevance of an image with respect to a query. It outputs the probability that the image is relevant ("True") or not relevant ("False").