Spaces:

ariG23498
/

qwen-od

Running

App Files Files Community

qwen-od / app.py

ariG23498 HF Staff

Update app.py

b69bd94 verified 7 months ago

raw

history blame

3.94 kB

	import os
	import re
	import gradio as gr
	from huggingface_hub import InferenceClient
	import requests
	from io import BytesIO
	from PIL import Image

	# Initialize Hugging Face Inference Client
	client = InferenceClient(provider="hf-inference")

	# Updated pattern to capture bounding box coordinates and object label
	BOX_TAG_PATTERN = r"<box>\((\d+),(\d+),(\d+),(\d+)\):([^<]+)</box>"

	def parse_bounding_boxes(text):
	"""
	Parse bounding boxes and object labels from the model response.
	Expected format: <box>(x1,y1,x2,y2):object_label</box>
	"""
	matches = re.findall(BOX_TAG_PATTERN, text)
	bboxes = []
	for match in matches:
	x1, y1, x2, y2, label = map(str, match) # Keep as strings for label
	x1, y1, x2, y2 = map(int, (x1, y1, x2, y2)) # Convert coordinates to int
	bboxes.append(((x1, y1, x2, y2), label.strip()))
	return bboxes

	def fetch_image(image_url):
	"""
	Fetch the image from the URL and return a PIL Image object.
	"""
	try:
	response = requests.get(image_url, timeout=10)
	response.raise_for_status()
	image = Image.open(BytesIO(response.content)).convert("RGB")
	return image
	except Exception as e:
	raise ValueError(f"Failed to fetch image from URL: {str(e)}")

	def predict(image_url, prompt):
	"""
	Process the image URL and prompt, return annotated image data.
	"""
	try:
	# Validate and fetch the image
	image = fetch_image(image_url)

	# Call the Hugging Face Inference API
	stream = client.chat.completions.create(
	model="Qwen/Qwen2.5-VL-32B-Instruct",
	messages=[
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": prompt,
	},
	{
	"type": "image_url",
	"image_url": {
	"url": image_url,
	}
	}
	]
	}
	],
	stream=True,
	)
	response_text = ""
	for chunk in stream:
	response_text += chunk.choices[0].delta.content

	# Parse bounding boxes and labels
	bboxes = parse_bounding_boxes(response_text)
	if not bboxes:
	return None, "No bounding boxes or objects detected."

	# Format for Gradio AnnotatedImage: (image, [(bbox, label), ...])
	annotations = [(bbox, label) for bbox, label in bboxes]
	return (image, annotations), "Success: Objects detected and annotated."

	except Exception as e:
	return None, f"Error: {str(e)}"

	# Gradio Interface
	def create_gradio_interface():
	with gr.Blocks(title="Object Detection Demo") as demo:
	gr.Markdown("# Object Detection with Bounding Boxes")
	gr.Markdown("Provide an image URL and a prompt to detect objects and display bounding boxes.")

	with gr.Row():
	with gr.Column():
	image_url = gr.Textbox(label="Image URL", placeholder="Enter a publicly accessible image URL")
	prompt = gr.Textbox(
	label="Prompt",
	placeholder="e.g., 'Detect and label all objects in the image with bounding boxes.'",
	lines=3
	)
	submit_btn = gr.Button("Run Detection")
	with gr.Column():
	output_image = gr.AnnotatedImage(label="Detected Objects")
	status = gr.Textbox(label="Status", interactive=False)

	submit_btn.click(
	fn=predict,
	inputs=[image_url, prompt],
	outputs=[output_image, status]
	)

	return demo

	# Launch the demo
	if __name__ == "__main__":
	demo = create_gradio_interface()
	demo.launch()