Inference using python

by sayeed111 - opened Mar 12

Discussion

sayeed111

Mar 12

to run the onnx model using python what are the input and output model format / parameters to pass

fatemehsaveh

Aug 9

from PIL import Image, ImageDraw
import requests
from transformers import AutoProcessor
import torch
import onnxruntime as ort
import numpy as np

Path to the downloaded ONNX model

onnx_model_path = "model.onnx" # Ensure this is the correct path
hf_repo = "onnx-community/grounding-dino-tiny-ONNX"

Text prompt for detection

text_prompt = ["cat"]

Load the image

image_url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(image_url, stream=True).raw).convert("RGB")
img_width, img_height = image.size

Load the processor

processor = AutoProcessor.from_pretrained(hf_repo)
processor_inputs = processor(images=image, text=text_prompt, return_tensors="pt")

Set device and providers

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
providers = ["CUDAExecutionProvider"] if device.type == "cuda" else ["CPUExecutionProvider"]

Move inputs to device

processor_inputs = {k: v.to(device) for k, v in processor_inputs.items()}
onnx_inputs = {k: v.detach().cpu().numpy() for k, v in processor_inputs.items()}

Run inference

ort_session = ort.InferenceSession(onnx_model_path, providers=providers)
outputs = ort_session.run(None, onnx_inputs)

Post-process outputs

logits, boxes = outputs
logits = torch.from_numpy(logits).sigmoid().numpy() # Apply sigmoid to get probabilities
boxes = boxes[0] # Shape: [num_queries, 4]
logits = logits[0] # Shape: [num_queries, num_classes]

Filter detections with confidence threshold

confidence_threshold = 0.3 # Adjust as needed
detections = []
for i, (score, box) in enumerate(zip(logits, boxes)):
if score.max() > confidence_threshold: # Check max score across classes
detections.append({"score": score.max(), "label": text_prompt[0], "box": box})

Draw bounding boxes on the image

draw = ImageDraw.Draw(image)
for detection in detections:
score, label, box = detection["score"], detection["label"], detection["box"]
# Box format: [x_center, y_center, width, height] (normalized)
x_center, y_center, width, height = box
# Convert to pixel coordinates
x1 = (x_center - width / 2) * img_width
y1 = (y_center - height / 2) * img_height
x2 = (x_center + width / 2) * img_width
y2 = (y_center + height / 2) * img_height
# Draw rectangle and label
draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
draw.text((x1, y1 - 10), f"{label}: {score:.2f}", fill="red")

Save or display the image

image.save("output_image_model.jpg")

image.show() # Uncomment to display the image if you have a GUI environment

print(f"Detections: {detections}")

Upload images, audio, and videos by dragging in the text input, pasting, or clicking here.

Tap or paste here to upload images

· Sign up or log in to comment