TangYiJay commited on
Commit
de0a685
·
verified ·
1 Parent(s): ff494c0
Files changed (1) hide show
  1. app.py +31 -56
app.py CHANGED
@@ -1,63 +1,38 @@
1
- import gradio as gr
2
- from transformers import AutoProcessor, LlavaForConditionalGeneration
3
  from PIL import Image
4
  import torch
 
5
 
6
- MODEL_ID = "liuhaotian/llava-v1.6-vicuna-7b"
7
-
8
- # Load model and processor (use correct classes)
9
- processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
10
- model = LlavaForConditionalGeneration.from_pretrained(
11
- MODEL_ID,
12
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
13
- low_cpu_mem_usage=True,
14
- trust_remote_code=True
15
- )
16
- device = "cuda" if torch.cuda.is_available() else "cpu"
17
- model.to(device)
18
-
19
- base_image = None
20
-
21
- def set_base(image):
22
- global base_image
23
- base_image = image
24
- return "✅ Base image has been set."
25
-
26
- def detect_object(image, prompt):
27
- if base_image is None:
28
- return "⚠️ Please upload a base image first."
29
-
30
- query = (
31
- f"Ignore the base image and only analyze the differences. "
32
- f"{prompt or 'Detect new objects and identify their material type.'}"
33
- )
34
-
35
- inputs = processor(
36
- text=query,
37
- images=[base_image, image],
38
- return_tensors="pt"
39
- ).to(device, torch.float16 if torch.cuda.is_available() else torch.float32)
40
-
41
- output = model.generate(**inputs, max_new_tokens=256)
42
  result = processor.decode(output[0], skip_special_tokens=True)
43
  return result
44
 
45
- with gr.Blocks(title="LLaVA Object Detector") as demo:
46
- gr.Markdown("## 🧠 LLaVA 1.6 Vicuna-7B — Visual Detection & Material Identification")
47
-
48
- with gr.Row():
49
- with gr.Column():
50
- base_img = gr.Image(label="Base Image", type="pil")
51
- set_base_btn = gr.Button("Set as Base Image")
52
- base_status = gr.Textbox(label="Status")
53
-
54
- with gr.Column():
55
- target_img = gr.Image(label="Detection Image", type="pil")
56
- prompt = gr.Textbox(label="Instruction", placeholder="Detect new objects and describe material")
57
- run_btn = gr.Button("Run Detection")
58
- output_box = gr.Textbox(label="Output")
59
-
60
- set_base_btn.click(set_base, inputs=base_img, outputs=base_status)
61
- run_btn.click(detect_object, inputs=[target_img, prompt], outputs=output_box)
62
 
63
- demo.launch()
 
 
1
+ from transformers import AutoProcessor, AutoModelForVision2Seq
 
2
  from PIL import Image
3
  import torch
4
+ import gradio as gr
5
 
6
+ MODEL_ID = "HuggingFaceM4/idefics2-8b"
7
+
8
+ # Load model and processor
9
+ processor = AutoProcessor.from_pretrained(MODEL_ID)
10
+ model = AutoModelForVision2Seq.from_pretrained(MODEL_ID, torch_dtype=torch.float16, device_map="auto")
11
+
12
+ def analyze_images(base_img, target_img, user_prompt):
13
+ if base_img is None or target_img is None:
14
+ return "Please upload both a base image and a target image."
15
+
16
+ images = [base_img, target_img]
17
+ prompt = f"Ignore the first image (base image). Analyze the second image: {user_prompt}"
18
+
19
+ inputs = processor(images=images, text=prompt, return_tensors="pt").to(model.device)
20
+ output = model.generate(**inputs, max_new_tokens=200)
21
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  result = processor.decode(output[0], skip_special_tokens=True)
23
  return result
24
 
25
+ demo = gr.Interface(
26
+ fn=analyze_images,
27
+ inputs=[
28
+ gr.Image(type="pil", label="Base Image"),
29
+ gr.Image(type="pil", label="Target Image"),
30
+ gr.Textbox(label="Prompt", placeholder="Describe what to analyze...")
31
+ ],
32
+ outputs=gr.Textbox(label="Model Output"),
33
+ title="Image Comparison with IDEFICS2-8B",
34
+ description="Upload two images. The model will ignore the base image and analyze the target image according to your prompt."
35
+ )
 
 
 
 
 
 
36
 
37
+ if __name__ == "__main__":
38
+ demo.launch()