zuminghuang commited on
Commit
97f3f85
·
verified ·
1 Parent(s): df3c00d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +17 -9
README.md CHANGED
@@ -19,29 +19,36 @@ Overview of Infinity-Parser training framework. Our model is optimized via reinf
19
  ## Inference
20
 
21
  ```python
 
22
  from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer, AutoProcessor
23
  from qwen_vl_utils import process_vision_info
24
 
25
  model_path = "infly/Infinity-Parser-7B"
26
  prompt = "Please transform the document’s contents into Markdown format."
27
 
28
- # default: Load the model on the available device(s)
 
 
 
 
 
 
29
  model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
30
- model_path, torch_dtype="auto", device_map="auto"
 
 
 
31
  )
32
 
33
- # We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
34
- # model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
35
- # model_path,
36
- # torch_dtype=torch.bfloat16,
37
- # attn_implementation="flash_attention_2",
38
- # device_map="auto",
39
- # )
40
 
 
41
  min_pixels = 256 * 28 * 28 # 448 * 448
42
  max_pixels = 2304 * 28 * 28 # 1344 * 1344
43
  processor = AutoProcessor.from_pretrained(model_path, min_pixels=min_pixels, max_pixels=max_pixels)
44
 
 
45
  messages = [
46
  {
47
  "role": "user",
@@ -68,6 +75,7 @@ inputs = processor(
68
  )
69
  inputs = inputs.to("cuda")
70
 
 
71
  generated_ids = model.generate(**inputs, max_new_tokens=4096)
72
  generated_ids_trimmed = [
73
  out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
 
19
  ## Inference
20
 
21
  ```python
22
+ import torch
23
  from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer, AutoProcessor
24
  from qwen_vl_utils import process_vision_info
25
 
26
  model_path = "infly/Infinity-Parser-7B"
27
  prompt = "Please transform the document’s contents into Markdown format."
28
 
29
+ print(f"Loading model and processor...")
30
+ # Default: Load the model on the available device(s)
31
+ # model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
32
+ # model_path, torch_dtype="auto", device_map="auto"
33
+ # )
34
+
35
+ # We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
36
  model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
37
+ model_path,
38
+ torch_dtype=torch.bfloat16,
39
+ attn_implementation="flash_attention_2",
40
+ device_map="auto",
41
  )
42
 
43
+ # Default processor
44
+ # processor = AutoProcessor.from_pretrained(model_path)
 
 
 
 
 
45
 
46
+ # Recommended processor
47
  min_pixels = 256 * 28 * 28 # 448 * 448
48
  max_pixels = 2304 * 28 * 28 # 1344 * 1344
49
  processor = AutoProcessor.from_pretrained(model_path, min_pixels=min_pixels, max_pixels=max_pixels)
50
 
51
+ print(f"Preparing messages for inference...")
52
  messages = [
53
  {
54
  "role": "user",
 
75
  )
76
  inputs = inputs.to("cuda")
77
 
78
+ print(f"Generating results...")
79
  generated_ids = model.generate(**inputs, max_new_tokens=4096)
80
  generated_ids_trimmed = [
81
  out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)