Spaces:
Running
Running
| import os | |
| import re | |
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| import requests | |
| from io import BytesIO | |
| from PIL import Image | |
| # Initialize Hugging Face Inference Client | |
| client = InferenceClient(provider="hf-inference") | |
| # Updated pattern to capture bounding box coordinates and object label | |
| BOX_TAG_PATTERN = r"<box>\((\d+),(\d+),(\d+),(\d+)\):([^<]+)</box>" | |
| def parse_bounding_boxes(text): | |
| """ | |
| Parse bounding boxes and object labels from the model response. | |
| Expected format: <box>(x1,y1,x2,y2):object_label</box> | |
| """ | |
| matches = re.findall(BOX_TAG_PATTERN, text) | |
| bboxes = [] | |
| for match in matches: | |
| x1, y1, x2, y2, label = map(str, match) # Keep as strings for label | |
| x1, y1, x2, y2 = map(int, (x1, y1, x2, y2)) # Convert coordinates to int | |
| bboxes.append(((x1, y1, x2, y2), label.strip())) | |
| return bboxes | |
| def fetch_image(image_url): | |
| """ | |
| Fetch the image from the URL and return a PIL Image object. | |
| """ | |
| try: | |
| response = requests.get(image_url, timeout=10) | |
| response.raise_for_status() | |
| image = Image.open(BytesIO(response.content)).convert("RGB") | |
| return image | |
| except Exception as e: | |
| raise ValueError(f"Failed to fetch image from URL: {str(e)}") | |
| def predict(image_url, prompt): | |
| """ | |
| Process the image URL and prompt, return annotated image data. | |
| """ | |
| try: | |
| # Validate and fetch the image | |
| image = fetch_image(image_url) | |
| # Call the Hugging Face Inference API | |
| stream = client.chat.completions.create( | |
| model="Qwen/Qwen2.5-VL-32B-Instruct", | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": [ | |
| { | |
| "type": "text", | |
| "text": prompt, | |
| }, | |
| { | |
| "type": "image_url", | |
| "image_url": { | |
| "url": image_url, | |
| } | |
| } | |
| ] | |
| } | |
| ], | |
| stream=True, | |
| ) | |
| response_text = "" | |
| for chunk in stream: | |
| response_text += chunk.choices[0].delta.content | |
| # Parse bounding boxes and labels | |
| bboxes = parse_bounding_boxes(response_text) | |
| if not bboxes: | |
| return None, "No bounding boxes or objects detected." | |
| # Format for Gradio AnnotatedImage: (image, [(bbox, label), ...]) | |
| annotations = [(bbox, label) for bbox, label in bboxes] | |
| return (image, annotations), "Success: Objects detected and annotated." | |
| except Exception as e: | |
| return None, f"Error: {str(e)}" | |
| # Gradio Interface | |
| def create_gradio_interface(): | |
| with gr.Blocks(title="Object Detection Demo") as demo: | |
| gr.Markdown("# Object Detection with Bounding Boxes") | |
| gr.Markdown("Provide an image URL and a prompt to detect objects and display bounding boxes.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| image_url = gr.Textbox(label="Image URL", placeholder="Enter a publicly accessible image URL") | |
| prompt = gr.Textbox( | |
| label="Prompt", | |
| placeholder="e.g., 'Detect and label all objects in the image with bounding boxes.'", | |
| lines=3 | |
| ) | |
| submit_btn = gr.Button("Run Detection") | |
| with gr.Column(): | |
| output_image = gr.AnnotatedImage(label="Detected Objects") | |
| status = gr.Textbox(label="Status", interactive=False) | |
| submit_btn.click( | |
| fn=predict, | |
| inputs=[image_url, prompt], | |
| outputs=[output_image, status] | |
| ) | |
| return demo | |
| # Launch the demo | |
| if __name__ == "__main__": | |
| demo = create_gradio_interface() | |
| demo.launch() |