Update README.md
Browse files
README.md
CHANGED
|
@@ -18,6 +18,7 @@ from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
|
|
| 18 |
processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
|
| 19 |
model = Qwen2VLForConditionalGeneration.from_pretrained(
|
| 20 |
"lightonai/MonoQwen2-VL-2B-LoRA-Reranker",
|
|
|
|
| 21 |
# attn_implementation="flash_attention_2",
|
| 22 |
# torch_dtype=torch.bfloat16,
|
| 23 |
)
|
|
@@ -45,7 +46,7 @@ messages = [
|
|
| 45 |
|
| 46 |
# Apply chat template and tokenize
|
| 47 |
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 48 |
-
inputs = processor(text=text, images=image, return_tensors="pt")
|
| 49 |
|
| 50 |
# Run inference to obtain logits
|
| 51 |
with torch.no_grad():
|
|
|
|
| 18 |
processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
|
| 19 |
model = Qwen2VLForConditionalGeneration.from_pretrained(
|
| 20 |
"lightonai/MonoQwen2-VL-2B-LoRA-Reranker",
|
| 21 |
+
device_map="auto",
|
| 22 |
# attn_implementation="flash_attention_2",
|
| 23 |
# torch_dtype=torch.bfloat16,
|
| 24 |
)
|
|
|
|
| 46 |
|
| 47 |
# Apply chat template and tokenize
|
| 48 |
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 49 |
+
inputs = processor(text=text, images=image, return_tensors="pt").to("cuda")
|
| 50 |
|
| 51 |
# Run inference to obtain logits
|
| 52 |
with torch.no_grad():
|