qwen-od / app.py
ariG23498's picture
ariG23498 HF Staff
Update app.py
b69bd94 verified
raw
history blame
3.94 kB
import os
import re
import gradio as gr
from huggingface_hub import InferenceClient
import requests
from io import BytesIO
from PIL import Image
# Initialize Hugging Face Inference Client
client = InferenceClient(provider="hf-inference")
# Updated pattern to capture bounding box coordinates and object label
BOX_TAG_PATTERN = r"<box>\((\d+),(\d+),(\d+),(\d+)\):([^<]+)</box>"
def parse_bounding_boxes(text):
"""
Parse bounding boxes and object labels from the model response.
Expected format: <box>(x1,y1,x2,y2):object_label</box>
"""
matches = re.findall(BOX_TAG_PATTERN, text)
bboxes = []
for match in matches:
x1, y1, x2, y2, label = map(str, match) # Keep as strings for label
x1, y1, x2, y2 = map(int, (x1, y1, x2, y2)) # Convert coordinates to int
bboxes.append(((x1, y1, x2, y2), label.strip()))
return bboxes
def fetch_image(image_url):
"""
Fetch the image from the URL and return a PIL Image object.
"""
try:
response = requests.get(image_url, timeout=10)
response.raise_for_status()
image = Image.open(BytesIO(response.content)).convert("RGB")
return image
except Exception as e:
raise ValueError(f"Failed to fetch image from URL: {str(e)}")
def predict(image_url, prompt):
"""
Process the image URL and prompt, return annotated image data.
"""
try:
# Validate and fetch the image
image = fetch_image(image_url)
# Call the Hugging Face Inference API
stream = client.chat.completions.create(
model="Qwen/Qwen2.5-VL-32B-Instruct",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt,
},
{
"type": "image_url",
"image_url": {
"url": image_url,
}
}
]
}
],
stream=True,
)
response_text = ""
for chunk in stream:
response_text += chunk.choices[0].delta.content
# Parse bounding boxes and labels
bboxes = parse_bounding_boxes(response_text)
if not bboxes:
return None, "No bounding boxes or objects detected."
# Format for Gradio AnnotatedImage: (image, [(bbox, label), ...])
annotations = [(bbox, label) for bbox, label in bboxes]
return (image, annotations), "Success: Objects detected and annotated."
except Exception as e:
return None, f"Error: {str(e)}"
# Gradio Interface
def create_gradio_interface():
with gr.Blocks(title="Object Detection Demo") as demo:
gr.Markdown("# Object Detection with Bounding Boxes")
gr.Markdown("Provide an image URL and a prompt to detect objects and display bounding boxes.")
with gr.Row():
with gr.Column():
image_url = gr.Textbox(label="Image URL", placeholder="Enter a publicly accessible image URL")
prompt = gr.Textbox(
label="Prompt",
placeholder="e.g., 'Detect and label all objects in the image with bounding boxes.'",
lines=3
)
submit_btn = gr.Button("Run Detection")
with gr.Column():
output_image = gr.AnnotatedImage(label="Detected Objects")
status = gr.Textbox(label="Status", interactive=False)
submit_btn.click(
fn=predict,
inputs=[image_url, prompt],
outputs=[output_image, status]
)
return demo
# Launch the demo
if __name__ == "__main__":
demo = create_gradio_interface()
demo.launch()