rt-detr-object-detection

Running on Zero

App Files Files Community

rt-detr-object-detection / app.py

freddyaboulton HF Staff

Try compiled

082831f over 1 year ago

raw

history blame

3.05 kB

	import spaces
	import gradio as gr
	import cv2
	import tempfile
	from PIL import Image, ImageDraw, ImageFont
	from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
	import torch

	image_processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd")
	model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd", torch_dtype=torch.float16).to("cuda")
	model = torch.compile(model, mode="reduce-overhead")

	# Compile by running inference
	inputs = image_processor(images="bus.png", return_tensors="pt").to("cuda", torch.float16)
	with torch.no_grad():
	outputs = model(**inputs)

	def draw_bounding_boxes(image, results, model, threshold=0.3):
	draw = ImageDraw.Draw(image)
	for result in results:
	for score, label_id, box in zip(
	result["scores"], result["labels"], result["boxes"]
	):
	if score > threshold:
	label = model.config.id2label[label_id.item()]
	box = [round(i) for i in box.tolist()]
	draw.rectangle(box, outline="red", width=3)
	draw.text((box[0], box[1]), f"{label}: {score:.2f}", fill="red")
	return image

	import time

	@spaces.GPU
	def inference(image, conf_threshold):
	inputs = image_processor(images=image, return_tensors="pt")

	start = time.time()
	with torch.no_grad():
	outputs = model(**inputs)

	results = image_processor.post_process_object_detection(
	outputs, target_sizes=torch.tensor([image.size[::-1]]), threshold=conf_threshold
	)
	end = time.time()
	print("time: ", end - start)

	bbs = draw_bounding_boxes(image, results, model, threshold=conf_threshold)
	print("bbs: ", time.time() - end)
	return bbs


	css = """.my-group {max-width: 600px !important; max-height: 600 !important;}
	.my-column {display: flex !important; justify-content: center !important; align-items: center !important};"""

	with gr.Blocks(css=css) as app:
	gr.HTML(
	"""
	<h1 style='text-align: center'>
	Near Real-Time Webcam Stream with RT-DETR
	</h1>
	"""
	)
	gr.HTML(
	"""
	<h3 style='text-align: center'>
	<a href='https://arxiv.org/abs/2304.08069' target='_blank'>arXiv</a> \| <a href='https://github.com/lyuwenyu/RT-DETR' target='_blank'>github</a>
	</h3>
	"""
	)
	with gr.Column(elem_classes=["my-column"]):
	with gr.Group(elem_classes=["my-group"]):
	image = gr.Image(
	type="pil",
	label="Image",
	sources="webcam",
	)
	conf_threshold = gr.Slider(
	label="Confidence Threshold",
	minimum=0.0,
	maximum=1.0,
	step=0.05,
	value=0.85,
	)
	image.stream(
	fn=inference,
	inputs=[image, conf_threshold],
	outputs=[image],
	stream_every=0.1,
	time_limit=30,
	)
	if __name__ == "__main__":
	app.launch()