Inference using python

#1
by sayeed111 - opened

to run the onnx model using python what are the input and output model format / parameters to pass

from PIL import Image, ImageDraw
import requests
from transformers import AutoProcessor
import torch
import onnxruntime as ort
import numpy as np

Path to the downloaded ONNX model

onnx_model_path = "model.onnx" # Ensure this is the correct path
hf_repo = "onnx-community/grounding-dino-tiny-ONNX"

Text prompt for detection

text_prompt = ["cat"]

Load the image

image_url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(image_url, stream=True).raw).convert("RGB")
img_width, img_height = image.size

Load the processor

processor = AutoProcessor.from_pretrained(hf_repo)
processor_inputs = processor(images=image, text=text_prompt, return_tensors="pt")

Set device and providers

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
providers = ["CUDAExecutionProvider"] if device.type == "cuda" else ["CPUExecutionProvider"]

Move inputs to device

processor_inputs = {k: v.to(device) for k, v in processor_inputs.items()}
onnx_inputs = {k: v.detach().cpu().numpy() for k, v in processor_inputs.items()}

Run inference

ort_session = ort.InferenceSession(onnx_model_path, providers=providers)
outputs = ort_session.run(None, onnx_inputs)

Post-process outputs

logits, boxes = outputs
logits = torch.from_numpy(logits).sigmoid().numpy() # Apply sigmoid to get probabilities
boxes = boxes[0] # Shape: [num_queries, 4]
logits = logits[0] # Shape: [num_queries, num_classes]

Filter detections with confidence threshold

confidence_threshold = 0.3 # Adjust as needed
detections = []
for i, (score, box) in enumerate(zip(logits, boxes)):
if score.max() > confidence_threshold: # Check max score across classes
detections.append({"score": score.max(), "label": text_prompt[0], "box": box})

Draw bounding boxes on the image

draw = ImageDraw.Draw(image)
for detection in detections:
score, label, box = detection["score"], detection["label"], detection["box"]
# Box format: [x_center, y_center, width, height] (normalized)
x_center, y_center, width, height = box
# Convert to pixel coordinates
x1 = (x_center - width / 2) * img_width
y1 = (y_center - height / 2) * img_height
x2 = (x_center + width / 2) * img_width
y2 = (y_center + height / 2) * img_height
# Draw rectangle and label
draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
draw.text((x1, y1 - 10), f"{label}: {score:.2f}", fill="red")

Save or display the image

image.save("output_image_model.jpg")

image.show() # Uncomment to display the image if you have a GUI environment

print(f"Detections: {detections}")

Sign up or log in to comment