|
|
import torch |
|
|
import torch.nn as nn |
|
|
import torch.nn.functional as F |
|
|
|
|
|
from functools import reduce |
|
|
|
|
|
class BaseNetwork(nn.Module): |
|
|
def __init__(self): |
|
|
super(BaseNetwork, self).__init__() |
|
|
|
|
|
def print_network(self): |
|
|
if isinstance(self, list): |
|
|
self = self[0] |
|
|
num_params = 0 |
|
|
for param in self.parameters(): |
|
|
num_params += param.numel() |
|
|
print( |
|
|
'Network [%s] was created. Total number of parameters: %.1f million. ' |
|
|
'To see the architecture, do print(network).' % |
|
|
(type(self).__name__, num_params / 1000000)) |
|
|
|
|
|
def init_weights(self, init_type='normal', gain=0.02): |
|
|
''' |
|
|
initialize network's weights |
|
|
init_type: normal | xavier | kaiming | orthogonal |
|
|
https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/9451e70673400885567d08a9e97ade2524c700d0/models/networks.py#L39 |
|
|
''' |
|
|
def init_func(m): |
|
|
classname = m.__class__.__name__ |
|
|
if classname.find('InstanceNorm2d') != -1: |
|
|
if hasattr(m, 'weight') and m.weight is not None: |
|
|
nn.init.constant_(m.weight.data, 1.0) |
|
|
if hasattr(m, 'bias') and m.bias is not None: |
|
|
nn.init.constant_(m.bias.data, 0.0) |
|
|
elif hasattr(m, 'weight') and (classname.find('Conv') != -1 |
|
|
or classname.find('Linear') != -1): |
|
|
if init_type == 'normal': |
|
|
nn.init.normal_(m.weight.data, 0.0, gain) |
|
|
elif init_type == 'xavier': |
|
|
nn.init.xavier_normal_(m.weight.data, gain=gain) |
|
|
elif init_type == 'xavier_uniform': |
|
|
nn.init.xavier_uniform_(m.weight.data, gain=1.0) |
|
|
elif init_type == 'kaiming': |
|
|
nn.init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') |
|
|
elif init_type == 'orthogonal': |
|
|
nn.init.orthogonal_(m.weight.data, gain=gain) |
|
|
elif init_type == 'none': |
|
|
m.reset_parameters() |
|
|
else: |
|
|
raise NotImplementedError( |
|
|
'initialization method [%s] is not implemented' % |
|
|
init_type) |
|
|
if hasattr(m, 'bias') and m.bias is not None: |
|
|
nn.init.constant_(m.bias.data, 0.0) |
|
|
|
|
|
self.apply(init_func) |
|
|
|
|
|
|
|
|
for m in self.children(): |
|
|
if hasattr(m, 'init_weights'): |
|
|
m.init_weights(init_type, gain) |
|
|
|
|
|
|
|
|
class Vec2Feat(nn.Module): |
|
|
def __init__(self, channel, hidden, kernel_size, stride, padding): |
|
|
super(Vec2Feat, self).__init__() |
|
|
self.relu = nn.LeakyReLU(0.2, inplace=True) |
|
|
c_out = reduce((lambda x, y: x * y), kernel_size) * channel |
|
|
self.embedding = nn.Linear(hidden, c_out) |
|
|
self.kernel_size = kernel_size |
|
|
self.stride = stride |
|
|
self.padding = padding |
|
|
self.bias_conv = nn.Conv2d(channel, |
|
|
channel, |
|
|
kernel_size=3, |
|
|
stride=1, |
|
|
padding=1) |
|
|
|
|
|
def forward(self, x, t, output_size): |
|
|
b_, _, _, _, c_ = x.shape |
|
|
x = x.view(b_, -1, c_) |
|
|
feat = self.embedding(x) |
|
|
b, _, c = feat.size() |
|
|
feat = feat.view(b * t, -1, c).permute(0, 2, 1) |
|
|
feat = F.fold(feat, |
|
|
output_size=output_size, |
|
|
kernel_size=self.kernel_size, |
|
|
stride=self.stride, |
|
|
padding=self.padding) |
|
|
feat = self.bias_conv(feat) |
|
|
return feat |
|
|
|
|
|
|
|
|
class FusionFeedForward(nn.Module): |
|
|
def __init__(self, dim, hidden_dim=1960, t2t_params=None): |
|
|
super(FusionFeedForward, self).__init__() |
|
|
|
|
|
self.fc1 = nn.Sequential(nn.Linear(dim, hidden_dim)) |
|
|
self.fc2 = nn.Sequential(nn.GELU(), nn.Linear(hidden_dim, dim)) |
|
|
assert t2t_params is not None |
|
|
self.t2t_params = t2t_params |
|
|
self.kernel_shape = reduce((lambda x, y: x * y), t2t_params['kernel_size']) |
|
|
|
|
|
def forward(self, x, output_size): |
|
|
n_vecs = 1 |
|
|
for i, d in enumerate(self.t2t_params['kernel_size']): |
|
|
n_vecs *= int((output_size[i] + 2 * self.t2t_params['padding'][i] - |
|
|
(d - 1) - 1) / self.t2t_params['stride'][i] + 1) |
|
|
|
|
|
x = self.fc1(x) |
|
|
b, n, c = x.size() |
|
|
normalizer = x.new_ones(b, n, self.kernel_shape).view(-1, n_vecs, self.kernel_shape).permute(0, 2, 1) |
|
|
normalizer = F.fold(normalizer, |
|
|
output_size=output_size, |
|
|
kernel_size=self.t2t_params['kernel_size'], |
|
|
padding=self.t2t_params['padding'], |
|
|
stride=self.t2t_params['stride']) |
|
|
|
|
|
x = F.fold(x.view(-1, n_vecs, c).permute(0, 2, 1), |
|
|
output_size=output_size, |
|
|
kernel_size=self.t2t_params['kernel_size'], |
|
|
padding=self.t2t_params['padding'], |
|
|
stride=self.t2t_params['stride']) |
|
|
|
|
|
x = F.unfold(x / normalizer, |
|
|
kernel_size=self.t2t_params['kernel_size'], |
|
|
padding=self.t2t_params['padding'], |
|
|
stride=self.t2t_params['stride']).permute( |
|
|
0, 2, 1).contiguous().view(b, n, c) |
|
|
x = self.fc2(x) |
|
|
return x |
|
|
|