Spaces:

biennh
/

head-pose-estimation

Runtime error

App Files Files Community

head-pose-estimation / demo.py

bienom

add model

cde7e09 over 3 years ago

raw

history blame contribute delete

4.67 kB

	from face_detection import RetinaFace
	from model import SixDRepNet
	import math
	import re
	from matplotlib import pyplot as plt
	import sys
	import os
	import argparse

	import numpy as np
	import cv2
	import matplotlib.pyplot as plt
	from numpy.lib.function_base import _quantile_unchecked

	import torch
	import torch.nn as nn
	from torch.utils.data import DataLoader
	from torchvision import transforms
	import torch.backends.cudnn as cudnn
	import torchvision
	import torch.nn.functional as F
	import utils
	import matplotlib
	from PIL import Image
	import time
	matplotlib.use('TkAgg')


	def parse_args():
	"""Parse input arguments."""
	parser = argparse.ArgumentParser(
	description='Head pose estimation using the 6DRepNet.')
	parser.add_argument('--gpu',
	dest='gpu_id', help='GPU device id to use [0]',
	default=0, type=int)
	parser.add_argument('--cam',
	dest='cam_id', help='Camera device id to use [0]',
	default=0, type=int)
	parser.add_argument('--snapshot',
	dest='snapshot', help='Name of model snapshot.',
	default='', type=str)
	parser.add_argument('--save_viz',
	dest='save_viz', help='Save images with pose cube.',
	default=False, type=bool)

	args = parser.parse_args()
	return args


	transformations = transforms.Compose([transforms.Resize(224),
	transforms.CenterCrop(224),
	transforms.ToTensor(),
	transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

	if __name__ == '__main__':
	args = parse_args()
	cudnn.enabled = True
	gpu = args.gpu_id
	cam = args.cam_id
	snapshot_path = args.snapshot
	model = SixDRepNet(backbone_name='RepVGG-A0',
	backbone_file='',
	deploy=True,
	pretrained=False)

	print('Loading data.')

	detector = RetinaFace(gpu_id=gpu)

	# Load snapshot
	saved_state_dict = torch.load(os.path.join(
	snapshot_path), map_location='cpu')

	if 'model_state_dict' in saved_state_dict:
	model.load_state_dict(saved_state_dict['model_state_dict'])
	else:
	model.load_state_dict(saved_state_dict)
	if gpu != -1:
	model.cuda(gpu)

	# Test the Model
	model.eval() # Change model to 'eval' mode (BN uses moving mean/var).

	cap = cv2.VideoCapture(cam)

	# Check if the webcam is opened correctly
	if not cap.isOpened():
	raise IOError("Cannot open webcam")

	with torch.no_grad():
	while True:
	ret, frame = cap.read()

	faces = detector(frame)

	for box, landmarks, score in faces:

	# Print the location of each face in this image
	if score < .95:
	continue
	x_min = int(box[0])
	y_min = int(box[1])
	x_max = int(box[2])
	y_max = int(box[3])
	bbox_width = abs(x_max - x_min)
	bbox_height = abs(y_max - y_min)

	x_min = max(0, x_min-int(0.2*bbox_height))
	y_min = max(0, y_min-int(0.2*bbox_width))
	x_max = x_max+int(0.2*bbox_height)
	y_max = y_max+int(0.2*bbox_width)

	img = frame[y_min:y_max, x_min:x_max]
	img = Image.fromarray(img)
	img = img.convert('RGB')
	img = transformations(img)

	img = torch.Tensor(img[None, :])
	if gpu != -1:
	img = img.cuda(gpu)

	c = cv2.waitKey(1)
	if c == 27:
	break

	start = time.time()
	R_pred = model(img)
	end = time.time()
	print('Head pose estimation: %2f ms' % ((end - start)*1000.))

	euler = utils.compute_euler_angles_from_rotation_matrices(
	R_pred,use_gpu=False)*180/np.pi
	p_pred_deg = euler[:, 0].cpu()
	y_pred_deg = euler[:, 1].cpu()
	r_pred_deg = euler[:, 2].cpu()

	#utils.draw_axis(frame, y_pred_deg, p_pred_deg, r_pred_deg, left+int(.5*(right-left)), top, size=100)
	utils.plot_pose_cube(frame, y_pred_deg, p_pred_deg, r_pred_deg, x_min + int(.5*(
	x_max-x_min)), y_min + int(.5*(y_max-y_min)), size=bbox_width)
	cv2.imshow("Demo", np.array(frame, dtype = np.uint8))
	cv2.waitKey(5)