lightonai
/

MonoQwen2-VL-v0.1

Visual Document Retrieval

Model card Files Files and versions

uminaty commited on Oct 25, 2024

Commit

6c562c8

·

verified ·

1 Parent(s): b3f1af4

Update README.md

Files changed (1) hide show

README.md +2 -1

README.md CHANGED Viewed

@@ -18,6 +18,7 @@ from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
 processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
 model = Qwen2VLForConditionalGeneration.from_pretrained(
     "lightonai/MonoQwen2-VL-2B-LoRA-Reranker",
     # attn_implementation="flash_attention_2",
     # torch_dtype=torch.bfloat16,
 )
@@ -45,7 +46,7 @@ messages = [
 # Apply chat template and tokenize
 text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-inputs = processor(text=text, images=image, return_tensors="pt")
 # Run inference to obtain logits
 with torch.no_grad():

 processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
 model = Qwen2VLForConditionalGeneration.from_pretrained(
     "lightonai/MonoQwen2-VL-2B-LoRA-Reranker",
+    device_map="auto",
     # attn_implementation="flash_attention_2",
     # torch_dtype=torch.bfloat16,
 )
 # Apply chat template and tokenize
 text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+inputs = processor(text=text, images=image, return_tensors="pt").to("cuda")
 # Run inference to obtain logits
 with torch.no_grad():