Spaces:

tidalove
/

adain

Sleeping

App Files Files Community

falseu commited on Apr 20, 2022

Commit

4f6c34a

1 Parent(s): 0dfef01

update comments

Browse files

Files changed (5) hide show

AdaIN.py +20 -1
test.py +11 -3
test_interpolate.py +6 -2
test_video.py +6 -1
train.py +11 -4

AdaIN.py CHANGED Viewed

@@ -13,7 +13,11 @@ class AdaINNet(nn.Module):
     def __init__(self, vgg_weight):
         super().__init__()
         self.encoder = vgg19(vgg_weight)
-        self.encoder = nn.Sequential(*list(self.encoder.children())[:22]) # drop layers after 4_1
         for parameter in self.encoder.parameters():
             parameter.requires_grad = False
@@ -21,15 +25,29 @@ class AdaINNet(nn.Module):
         self.mseloss = nn.MSELoss()
     def _style_loss(self, x, y):
         return self.mseloss(torch.mean(x, dim=[2, 3]), torch.mean(y, dim=[2, 3])) + \
             self.mseloss(torch.std(x, dim=[2, 3]), torch.std(y, dim=[2, 3]))
     def forward(self, content, style, alpha=1.0):
         content_enc = self.encoder(content)
         style_enc = self.encoder(style)
         transfer_enc = adaptive_instance_normalization(content_enc, style_enc)
         out = self.decoder(transfer_enc)
         # vgg19 layer relu1_1
@@ -47,6 +65,7 @@ class AdaINNet(nn.Module):
         # vgg19 layer relu4_1
         out_enc = self.encoder[13:](out_relu31)
         content_loss = self.mseloss(out_enc, transfer_enc)
         style_loss = self._style_loss(out_relu11, style_relu11) + self._style_loss(out_relu21, style_relu21) + \
             self._style_loss(out_relu31, style_relu31) + self._style_loss(out_enc, style_enc)

     def __init__(self, vgg_weight):
         super().__init__()
         self.encoder = vgg19(vgg_weight)
+        # drop layers after 4_1
+        self.encoder = nn.Sequential(*list(self.encoder.children())[:22])
+        # No optimization for encoder
         for parameter in self.encoder.parameters():
             parameter.requires_grad = False
         self.mseloss = nn.MSELoss()
+    """
+    Computes style loss of two images
+    Args:
+        x (torch.FloatTensor): content image tensor
+        y (torch.FloatTensor): style image tensor
+    Return:
+        Mean Squared Error between x.mean, y.mean and MSE between x.std, y.std
+    """
     def _style_loss(self, x, y):
         return self.mseloss(torch.mean(x, dim=[2, 3]), torch.mean(y, dim=[2, 3])) + \
             self.mseloss(torch.std(x, dim=[2, 3]), torch.std(y, dim=[2, 3]))
     def forward(self, content, style, alpha=1.0):
+        # Generate image features
         content_enc = self.encoder(content)
         style_enc = self.encoder(style)
+        # Perform style transfer on feature space
         transfer_enc = adaptive_instance_normalization(content_enc, style_enc)
+        # Generate outptu image
         out = self.decoder(transfer_enc)
         # vgg19 layer relu1_1
         # vgg19 layer relu4_1
         out_enc = self.encoder[13:](out_relu31)
+        # Calculate loss
         content_loss = self.mseloss(out_enc, transfer_enc)
         style_loss = self._style_loss(out_relu11, style_relu11) + self._style_loss(out_relu21, style_relu21) + \
             self._style_loss(out_relu31, style_relu31) + self._style_loss(out_enc, style_enc)

test.py CHANGED Viewed

@@ -69,6 +69,7 @@ def main():
 	assert len(content_pths) > 0, 'Failed to load content image'
 	assert len(style_pths) > 0, 'Failed to load style image'
 	out_dir = './results/'
 	os.makedirs(out_dir, exist_ok=True)
@@ -81,8 +82,9 @@ def main():
 	# Prepare image transform
 	t = transform(512)
-	# Prepare grid image
 	if args.grid_pth:
 		imgs = [np.ones((1, 1, 3), np.uint8) * 255]
 		for style_pth in style_pths:
 			imgs.append(Image.open(style_pth))
@@ -101,15 +103,20 @@ def main():
 			style_tensor = t(Image.open(style_pth)).unsqueeze(0).to(device)
-			tic = time.perf_counter() # Start time
 			with torch.no_grad():
 				out_tensor = style_transfer(content_tensor, style_tensor, model.encoder, model.decoder, args.alpha).cpu()
-			toc = time.perf_counter() # End time
 			print("Content: " + content_pth.stem + ". Style: " \
 				+ style_pth.stem + '. Alpha: ' + str(args.alpha) + '. Style Transfer time: %.4f seconds' % (toc-tic))
 			times.append(toc-tic)
 			out_pth = out_dir + content_pth.stem + '_style_' + style_pth.stem + '_alpha' + str(args.alpha) + content_pth.suffix
 			save_image(out_tensor, out_pth)
@@ -122,6 +129,7 @@ def main():
 		avg = sum(times)/len(times)
 		print("Average style transfer time: %.4f seconds" % (avg))
 	if args.grid_pth:
 		print("Generating grid image")
 		grid_image(len(content_pths) + 1, len(style_pths) + 1, imgs, save_pth=args.grid_pth)

 	assert len(content_pths) > 0, 'Failed to load content image'
 	assert len(style_pths) > 0, 'Failed to load style image'
+	# Prepare directory for saving results
 	out_dir = './results/'
 	os.makedirs(out_dir, exist_ok=True)
 	# Prepare image transform
 	t = transform(512)
+	# Prepare grid image, add style images to the first row
 	if args.grid_pth:
+		# Add empty image
 		imgs = [np.ones((1, 1, 3), np.uint8) * 255]
 		for style_pth in style_pths:
 			imgs.append(Image.open(style_pth))
 			style_tensor = t(Image.open(style_pth)).unsqueeze(0).to(device)
+			# Start time
+			tic = time.perf_counter()
+			# Execute style transfer
 			with torch.no_grad():
 				out_tensor = style_transfer(content_tensor, style_tensor, model.encoder, model.decoder, args.alpha).cpu()
+			 # End time
+			toc = time.perf_counter()
 			print("Content: " + content_pth.stem + ". Style: " \
 				+ style_pth.stem + '. Alpha: ' + str(args.alpha) + '. Style Transfer time: %.4f seconds' % (toc-tic))
 			times.append(toc-tic)
+			# Save image
 			out_pth = out_dir + content_pth.stem + '_style_' + style_pth.stem + '_alpha' + str(args.alpha) + content_pth.suffix
 			save_image(out_tensor, out_pth)
 		avg = sum(times)/len(times)
 		print("Average style transfer time: %.4f seconds" % (avg))
+	# Generate grid image
 	if args.grid_pth:
 		print("Generating grid image")
 		grid_image(len(content_pths) + 1, len(style_pths) + 1, imgs, save_pth=args.grid_pth)

test_interpolate.py CHANGED Viewed

@@ -102,24 +102,28 @@ def main():
 	for content_pth in content_pths:
 		content_tensor = t(Image.open(content_pth)).unsqueeze(0).to(device)
 		style_tensor = []
 		for style_pth in style_pths:
 			img = Image.open(style_pth)
-			style_tensor.append(transform([512, 512])(img)) # Convert style images to same size
 		style_tensor = torch.stack(style_tensor, dim=0).to(device)
-		for inter_weight in inter_weights:
 			with torch.no_grad():
 				out_tensor = out_tensor = interpolate_style_transfer(content_tensor, style_tensor, model.encoder, model.decoder, args.alpha, inter_weight).cpu()
 			print("Content: " + content_pth.stem + ". Style: " + str([style_pth.stem for style_pth in style_pths]) + ". Interpolation weight: ", str(inter_weight))
 			out_pth = out_dir + content_pth.stem + '_interpolate_' + str(inter_weight) + content_pth.suffix
 			save_image(out_tensor, out_pth)
 			if args.grid_pth:
 				imgs.append(Image.open(out_pth))
 	if args.grid_pth:
 		print("Generating grid image")
 		grid_image(5, 5, imgs, save_pth=args.grid_pth)

 	for content_pth in content_pths:
 		content_tensor = t(Image.open(content_pth)).unsqueeze(0).to(device)
+		# Prepare multiple style images
 		style_tensor = []
 		for style_pth in style_pths:
 			img = Image.open(style_pth)
+			style_tensor.append(transform([512, 512])(img))
 		style_tensor = torch.stack(style_tensor, dim=0).to(device)
+		for inter_weight in inter_weights:
+			# Execute Interpolate style transfer
 			with torch.no_grad():
 				out_tensor = out_tensor = interpolate_style_transfer(content_tensor, style_tensor, model.encoder, model.decoder, args.alpha, inter_weight).cpu()
 			print("Content: " + content_pth.stem + ". Style: " + str([style_pth.stem for style_pth in style_pths]) + ". Interpolation weight: ", str(inter_weight))
+			# Save results
 			out_pth = out_dir + content_pth.stem + '_interpolate_' + str(inter_weight) + content_pth.suffix
 			save_image(out_tensor, out_pth)
 			if args.grid_pth:
 				imgs.append(Image.open(out_pth))
+	# Generate grid image
 	if args.grid_pth:
 		print("Generating grid image")
 		grid_image(5, 5, imgs, save_pth=args.grid_pth)

test_video.py CHANGED Viewed

@@ -55,13 +55,16 @@ def main():
 	style_image_pth = Path(args.style_image)
 	style_image = Image.open(style_image_pth)
 	fps = int(content_video.get(cv2.CAP_PROP_FPS))
 	frame_count = int(content_video.get(cv2.CAP_PROP_FRAME_COUNT))
 	video_height = int(content_video.get(cv2.CAP_PROP_FRAME_HEIGHT))
 	video_width = int(content_video.get(cv2.CAP_PROP_FRAME_WIDTH))
 	video_tqdm = tqdm(frame_count)
 	out_dir = './results_video/'
 	os.makedirs(out_dir, exist_ok=True)
 	out_pth = Path(out_dir + content_video_pth.stem + '_style_' \
@@ -81,7 +84,8 @@ def main():
 	while content_video.isOpened():
 		ret, content_image = content_video.read()
-		if not ret: # Failed to read a frame
 			break
 		content_tensor = t(Image.fromarray(content_image)).unsqueeze(0).to(device)
@@ -96,6 +100,7 @@ def main():
 		out_tensor = cv2.normalize(src=out_tensor, dst=None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)
 		out_tensor = cv2.resize(out_tensor, (video_width, video_height), interpolation=cv2.INTER_CUBIC)
 		writer.append_data(np.array(out_tensor))
 		video_tqdm.update(1)

 	style_image_pth = Path(args.style_image)
 	style_image = Image.open(style_image_pth)
+	# Read video info
 	fps = int(content_video.get(cv2.CAP_PROP_FPS))
 	frame_count = int(content_video.get(cv2.CAP_PROP_FRAME_COUNT))
 	video_height = int(content_video.get(cv2.CAP_PROP_FRAME_HEIGHT))
 	video_width = int(content_video.get(cv2.CAP_PROP_FRAME_WIDTH))
+	# Prepare loop
 	video_tqdm = tqdm(frame_count)
+	# Prepare output video writer
 	out_dir = './results_video/'
 	os.makedirs(out_dir, exist_ok=True)
 	out_pth = Path(out_dir + content_video_pth.stem + '_style_' \
 	while content_video.isOpened():
 		ret, content_image = content_video.read()
+		# Failed to read a frame
+		if not ret:
 			break
 		content_tensor = t(Image.fromarray(content_image)).unsqueeze(0).to(device)
 		out_tensor = cv2.normalize(src=out_tensor, dst=None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)
 		out_tensor = cv2.resize(out_tensor, (video_width, video_height), interpolation=cv2.INTER_CUBIC)
+		# Write output frame to video
 		writer.append_data(np.array(out_tensor))
 		video_tqdm.update(1)

train.py CHANGED Viewed

@@ -17,21 +17,24 @@ def main():
 	args = parser.parse_args()
 	device = torch.device('cuda' if args.cuda and torch.cuda.is_available() else 'cpu')
 	check_point_dir = './check_point/'
 	weights_dir = './weights/'
 	train_set = TrainSet(args.content_dir, args.style_dir)
 	train_loader = DataLoader(dataset=train_set, batch_size=args.batch_size, shuffle=True)
 	vgg_model = torch.load('vgg_normalized.pth')
 	model = AdaINNet(vgg_model).to(device)
 	decoder_optimizer = torch.optim.Adam(model.decoder.parameters(), lr=1e-6)
 	total_loss, content_loss, style_loss = 0.0, 0.0, 0.0
 	losses = []
 	iteration = 0
-	# If resume
 	if args.resume > 0:
 		states = torch.load(check_point_dir + "epoch_" + str(args.resume)+'.pth')
 		model.decoder.load_state_dict(states['decoder'])
@@ -54,10 +57,14 @@ def main():
 			content_batch = content_batch.to(device)
 			style_batch = style_batch.to(device)
 			loss_content, loss_style = model(content_batch, style_batch)
 			loss_scaled = loss_content + 10 * loss_style
 			loss_scaled.backward()
 			decoder_optimizer.step()
 			total_loss = loss_scaled.item()
 			content_loss = loss_content.item()
 			style_loss = loss_style.item()

 	args = parser.parse_args()
 	device = torch.device('cuda' if args.cuda and torch.cuda.is_available() else 'cpu')
 	check_point_dir = './check_point/'
 	weights_dir = './weights/'
+	# Prepare Training dataset
 	train_set = TrainSet(args.content_dir, args.style_dir)
 	train_loader = DataLoader(dataset=train_set, batch_size=args.batch_size, shuffle=True)
+	# load vgg19 weights
 	vgg_model = torch.load('vgg_normalized.pth')
 	model = AdaINNet(vgg_model).to(device)
 	decoder_optimizer = torch.optim.Adam(model.decoder.parameters(), lr=1e-6)
 	total_loss, content_loss, style_loss = 0.0, 0.0, 0.0
 	losses = []
 	iteration = 0
+	# If resume training, load states
 	if args.resume > 0:
 		states = torch.load(check_point_dir + "epoch_" + str(args.resume)+'.pth')
 		model.decoder.load_state_dict(states['decoder'])
 			content_batch = content_batch.to(device)
 			style_batch = style_batch.to(device)
+			# Feed forward and compute loss
 			loss_content, loss_style = model(content_batch, style_batch)
 			loss_scaled = loss_content + 10 * loss_style
+			# Gradient descent
 			loss_scaled.backward()
 			decoder_optimizer.step()
 			total_loss = loss_scaled.item()
 			content_loss = loss_content.item()
 			style_loss = loss_style.item()