Spaces:

wwen1997
/

Framer

Running on Zero

App Files Files Community

Framer / models_diffusers /sift_match.py

wwen1997

Upload 13 files

7615afe verified about 1 year ago

raw

history blame contribute delete

8.31 kB

	from scipy.interpolate import interp1d, PchipInterpolator

	import numpy as np
	from PIL import Image
	import cv2
	import torch


	def sift_match(
	img1, img2,
	thr=0.5,
	topk=5, method="max_dist",
	output_path="sift_matches.png",
	):

	assert method in ["max_dist", "random", "max_score", "max_score_even"]

	# img1 and img2 are PIL images
	# small threshold means less points

	# 1. to cv2 grayscale image
	img1_rgb = np.array(img1).copy()
	img2_rgb = np.array(img2).copy()
	img1 = cv2.cvtColor(np.array(img1), cv2.COLOR_RGB2BGR)
	img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
	img2 = cv2.cvtColor(np.array(img2), cv2.COLOR_RGB2BGR)
	img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)

	# 2. use sift to extract keypoints and descriptors
	# Initiate SIFT detector
	sift = cv2.SIFT_create()
	# find the keypoints and descriptors with SIFT
	kp1, des1 = sift.detectAndCompute(img1, None)
	kp2, des2 = sift.detectAndCompute(img2, None)
	# BFMatcher with default params
	bf = cv2.BFMatcher()
	# bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=True)
	# bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
	matches = bf.knnMatch(des1, des2, k=2)

	# Apply ratio test
	good = []
	point_list = []
	distance_list = []

	if method in ['max_score', 'max_score_even']:
	matches = sorted(matches, key=lambda x: x[0].distance / x[1].distance)

	anchor_points_list = []
	for m, n in matches[:topk]:
	print(m.distance / n.distance)

	# check evenly distributed
	if method == 'max_score_even':
	to_close = False
	for anchor_point in anchor_points_list:
	pt1 = kp1[m.queryIdx].pt
	dist = np.linalg.norm(np.array(pt1) - np.array(anchor_point))
	if dist < 50:
	to_close = True
	break
	if to_close:
	continue

	good.append([m])

	pt1 = kp1[m.queryIdx].pt
	pt2 = kp2[m.trainIdx].pt
	dist = np.linalg.norm(np.array(pt1) - np.array(pt2))
	distance_list.append(dist)

	anchor_points_list.append(pt1)

	pt1 = torch.tensor(pt1)
	pt2 = torch.tensor(pt2)
	pt = torch.stack([pt1, pt2]) # (2, 2)
	point_list.append(pt)

	if method in ['max_dist', 'random']:
	for m, n in matches:
	if m.distance < thr * n.distance:
	good.append([m])

	pt1 = kp1[m.queryIdx].pt
	pt2 = kp2[m.trainIdx].pt
	dist = np.linalg.norm(np.array(pt1) - np.array(pt2))
	distance_list.append(dist)

	pt1 = torch.tensor(pt1)
	pt2 = torch.tensor(pt2)
	pt = torch.stack([pt1, pt2]) # (2, 2)
	point_list.append(pt)

	distance_list = np.array(distance_list)
	# only keep the points with the largest topk distance
	idx = np.argsort(distance_list)
	if method == "max_dist":
	idx = idx[-topk:]
	elif method == "random":
	topk = min(topk, len(idx))
	idx = np.random.choice(idx, topk, replace=False)
	elif method == "max_score":
	import pdb; pdb.set_trace()
	raise NotImplementedError
	# idx = np.argsort(distance_list)[:topk]
	else:
	raise ValueError(f"Unknown method {method}")

	point_list = [point_list[i] for i in idx]
	good = [good[i] for i in idx]

	# # cv2.drawMatchesKnn expects list of lists as matches.
	# draw_params = dict(
	# matchColor=(255, 0, 0),
	# singlePointColor=None,
	# flags=2,
	# )
	# img3 = cv2.drawMatchesKnn(img1, kp1, img2, kp2, good, None, **draw_params)


	# # manually draw the matches, the images are put in horizontal
	# img3 = np.concatenate([img1_rgb, img2_rgb], axis=1) # (h, 2w, 3)
	# for m in good:
	# pt1 = kp1[m[0].queryIdx].pt
	# pt2 = kp2[m[0].trainIdx].pt
	# pt1 = (int(pt1[0]), int(pt1[1]))
	# pt2 = (int(pt2[0]) + img1_rgb.shape[1], int(pt2[1]))
	# cv2.line(img3, pt1, pt2, (255, 0, 0), 1)

	# manually draw the matches, the images are put in vertical. with 10 pixels margin
	margin = 10
	img3 = np.zeros((img1_rgb.shape[0] + img2_rgb.shape[0] + margin, max(img1_rgb.shape[1], img2_rgb.shape[1]), 3), dtype=np.uint8)
	# the margin is white
	img3[:, :] = 255
	img3[:img1_rgb.shape[0], :img1_rgb.shape[1]] = img1_rgb
	img3[img1_rgb.shape[0] + margin:, :img2_rgb.shape[1]] = img2_rgb
	# create a color list of 6 different colors
	color_list = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (0, 255, 255), (255, 0, 255)]
	for color_idx, m in enumerate(good):
	pt1 = kp1[m[0].queryIdx].pt
	pt2 = kp2[m[0].trainIdx].pt
	pt1 = (int(pt1[0]), int(pt1[1]))
	pt2 = (int(pt2[0]), int(pt2[1]) + img1_rgb.shape[0] + margin)
	# cv2.line(img3, pt1, pt2, (255, 0, 0), 1)
	# avoid the zigzag artifact in line
	# random_color = tuple(np.random.randint(0, 255, 3).tolist())
	color = color_list[color_idx % len(color_list)]
	cv2.line(img3, pt1, pt2, color, 1, lineType=cv2.LINE_AA)
	# add a empty circle to both start and end points
	cv2.circle(img3, pt1, 3, color, lineType=cv2.LINE_AA)
	cv2.circle(img3, pt2, 3, color, lineType=cv2.LINE_AA)

	Image.fromarray(img3).save(output_path)
	print(f"Save the sift matches to {output_path}")

	# (f, topk, 2), f=2 (before interpolation)
	if len(point_list) == 0:
	return None

	point_list = torch.stack(point_list)
	point_list = point_list.permute(1, 0, 2)

	return point_list


	def interpolate_trajectory(points_torch, num_frames, t=None):
	# points:(f, topk, 2), f=2 (before interpolation)

	num_points = points_torch.shape[1]
	points_torch = points_torch.permute(1, 0, 2) # (topk, f, 2)

	points_list = []
	for i in range(num_points):
	# points:(f, 2)
	points = points_torch[i].cpu().numpy()

	x = [point[0] for point in points]
	y = [point[1] for point in points]

	if t is None:
	t = np.linspace(0, 1, len(points))

	# fx = interp1d(t, x, kind='cubic')
	# fy = interp1d(t, y, kind='cubic')
	fx = PchipInterpolator(t, x)
	fy = PchipInterpolator(t, y)

	new_t = np.linspace(0, 1, num_frames)

	new_x = fx(new_t)
	new_y = fy(new_t)
	new_points = list(zip(new_x, new_y))

	points_list.append(new_points)

	points = torch.tensor(points_list) # (topk, num_frames, 2)
	points = points.permute(1, 0, 2) # (num_frames, topk, 2)

	return points


	# diffusion feature matching
	def point_tracking(
	F0,
	F1,
	handle_points,
	handle_points_init,
	track_dist=5,
	):
	# handle_points: (num_points, 2)
	# NOTE:
	# 1. all row and col are reversed
	# 2. handle_points in (y, x), not (x, y)

	# reverse row and col
	handle_points = torch.stack([handle_points[:, 1], handle_points[:, 0]], dim=-1)
	handle_points_init = torch.stack([handle_points_init[:, 1], handle_points_init[:, 0]], dim=-1)

	with torch.no_grad():
	_, _, max_r, max_c = F0.shape

	for i in range(len(handle_points)):
	pi0, pi = handle_points_init[i], handle_points[i]
	f0 = F0[:, :, int(pi0[0]), int(pi0[1])]

	r1, r2 = max(0, int(pi[0]) - track_dist), min(max_r, int(pi[0]) + track_dist + 1)
	c1, c2 = max(0, int(pi[1]) - track_dist), min(max_c, int(pi[1]) + track_dist + 1)
	F1_neighbor = F1[:, :, r1:r2, c1:c2]
	all_dist = (f0.unsqueeze(dim=-1).unsqueeze(dim=-1) - F1_neighbor).abs().sum(dim=1)
	all_dist = all_dist.squeeze(dim=0)
	row, col = divmod(all_dist.argmin().item(), all_dist.shape[-1])
	# handle_points[i][0] = pi[0] - track_dist + row
	# handle_points[i][1] = pi[1] - track_dist + col
	handle_points[i][0] = r1 + row
	handle_points[i][1] = c1 + col

	handle_points = torch.stack([handle_points[:, 1], handle_points[:, 0]], dim=-1) # (num_points, 2)

	return handle_points