shifting to GatedPixelCNN

ritheshkumar95 · ritheshkumar95 · commit 0ee160e9fbbd · 2018-04-25T15:33:52.000-04:00
diff --git a/main.py b/main.py
@@ -100,8 +100,6 @@ def generate_samples():
     x, _ = test_loader.__iter__().next()
     x = Variable(x[:32]).cuda()
     x_tilde, _, _ = model(x)
-    # x_tilde = (x_tilde + 1)/2
-    # x = (x + 1)/2
 
     x_cat = torch.cat([x, x_tilde], 0)
     images = x_cat.cpu().data
diff --git a/modules.py b/modules.py
@@ -83,52 +83,106 @@ def forward(self, x):
         return x_tilde, z_e_x, z_q_x
 
 
-class MaskedConv2d(nn.Conv2d):
-    def __init__(self, mask_type, *args, **kwargs):
-        super(MaskedConv2d, self).__init__(*args, **kwargs)
-        assert mask_type in {'A', 'B'}
-        self.register_buffer('mask', self.weight.data.clone())
-        _, _, kH, kW = self.weight.size()
-        self.mask.fill_(1)
-        self.mask[:, :, kH // 2, kW // 2 + (mask_type == 'B'):] = 0
-        self.mask[:, :, kH // 2 + 1:] = 0
+class GatedActivation(nn.Module):
+    def __init__(self):
+        super().__init__()
 
     def forward(self, x):
-        self.weight.data *= self.mask
-        return super(MaskedConv2d, self).forward(x)
+        x, y = x.chunk(2, dim=1)
+        return F.tanh(x) * F.sigmoid(y)
+
+
+class GatedMaskedConv2d(nn.Module):
+    def __init__(self, mask_type, dim, kernel, residual=True):
+        super().__init__()
+        assert kernel % 2 == 1, print("Kernel size must be odd")
+        self.mask_type = mask_type
+        self.residual = residual
+
+        kernel_shp = (kernel // 2 + 1, kernel)  # (ceil(n/2), n)
+        padding_shp = (kernel // 2, kernel // 2)
+        self.vert_stack = nn.Conv2d(
+            dim, dim * 2,
+            kernel_shp, 1, padding_shp
+        )
+
+        self.vert_to_horiz = nn.Conv2d(2 * dim, 2 * dim, 1)
+
+        kernel_shp = (1, kernel // 2 + 1)
+        padding_shp = (0, kernel // 2)
+        self.horiz_stack = nn.Conv2d(
+            dim, dim * 2,
+            kernel_shp, 1, padding_shp
+        )
+
+        self.horiz_resid = nn.Conv2d(dim, dim, 1)
+
+        self.gate = GatedActivation()
+
+    def make_causal(self):
+        self.vert_stack.weight.data[:, :, -1].zero_()  # Mask final row
+        self.horiz_stack.weight.data[:, :, :, -1].zero_()  # Mask final column
 
+    def forward(self, x_v, x_h):
+        if self.mask_type == 'A':
+            self.make_causal()
 
-class PixelCNN(nn.Module):
-    def __init__(self, dim=64, n_layers=4):
+        h_vert = self.vert_stack(x_v)
+        h_vert = h_vert[:, :, :x_v.size(-1), :]
+        out_v = self.gate(h_vert)
+
+        h_horiz = self.horiz_stack(x_h)
+        h_horiz = h_horiz[:, :, :, :x_h.size(-2)]
+        v2h = self.vert_to_horiz(h_vert)
+
+        out = self.gate(v2h + h_horiz)
+        if self.residual:
+            out_h = self.horiz_resid(out) + x_h
+        else:
+            out_h = self.horiz_resid(out)
+
+        return out_v, out_h
+
+
+class GatedPixelCNN(nn.Module):
+    def __init__(self, input_dim=256, dim=64, n_layers=7):
         super().__init__()
         self.dim = 64
 
         # Create embedding layer to embed input
-        self.embedding = nn.Embedding(256, dim)
+        self.embedding = nn.Embedding(input_dim, dim)
 
         # Building the PixelCNN layer by layer
-        net = []
+        self.layers = nn.ModuleList()
 
         # Initial block with Mask-A convolution
         # Rest with Mask-B convolutions
         for i in range(n_layers):
             mask_type = 'A' if i == 0 else 'B'
-            net.extend([
-                MaskedConv2d(mask_type, dim, dim, 7, 1, 3, bias=False),
-                nn.BatchNorm2d(dim),
-                nn.ReLU(True)
-            ])
+            kernel = 7 if i == 0 else 3
+            residual = False if i == 0 else True
 
-        # Add the output layer
-        net.append(nn.Conv2d(dim, 256, 1))
+            self.layers.append(
+                GatedMaskedConv2d(mask_type, dim, kernel, residual)
+            )
 
-        self.net = nn.Sequential(*net)
+        # Add the output layer
+        self.output_conv = nn.Sequential(
+            nn.Conv2d(dim, dim, 1),
+            nn.ReLU(True),
+            nn.Conv2d(dim, input_dim, 1)
+        )
 
     def forward(self, x):
         shp = x.size() + (-1, )
         x = self.embedding(x.view(-1)).view(shp)  # (B, H, W, C)
         x = x.permute(0, 3, 1, 2)  # (B, C, W, W)
-        return self.net(x)
+
+        x_v, x_h = (x, x)
+        for i, layer in enumerate(self.layers):
+            x_v, x_h = layer(x_v, x_h)
+
+        return self.output_conv(x_h)
 
     def generate(self, batch_size=64):
         x = Variable(
diff --git a/pixelcnn.py b/pixelcnn.py
@@ -1,7 +1,7 @@
 import torch
 import torch.nn as nn
 from torchvision import datasets, transforms
-from modules import AutoEncoder, PixelCNN, to_scalar
+from modules import AutoEncoder, GatedPixelCNN, to_scalar
 from torch.autograd import Variable
 import numpy as np
 from torchvision.utils import save_image
@@ -16,22 +16,22 @@
 N_EPOCHS = 100
 
 
+preproc_transform = transforms.Compose([
+    transforms.ToTensor(),
+    # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+])
 train_loader = torch.utils.data.DataLoader(
     datasets.CIFAR10(
         '../data/cifar10/', train=True, download=True,
-        transform=transforms.Compose(
-            [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
-        )
+        transform=preproc_transform,
     ), batch_size=BATCH_SIZE, shuffle=False,
     num_workers=NUM_WORKERS, pin_memory=True
 )
 
 test_loader = torch.utils.data.DataLoader(
     datasets.CIFAR10(
         '../data/cifar10/', train=False,
-        transform=transforms.Compose(
-            [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
-        )
+        transform=preproc_transform
     ), batch_size=BATCH_SIZE, shuffle=False,
     num_workers=NUM_WORKERS, pin_memory=True
 )
@@ -40,7 +40,7 @@
 autoencoder.load_state_dict(torch.load('best_autoencoder.pt'))
 autoencoder.eval()
 
-model = PixelCNN().cuda()
+model = GatedPixelCNN().cuda()
 criterion = nn.CrossEntropyLoss().cuda()
 opt = torch.optim.Adam(model.parameters(), lr=LR)
 
@@ -104,13 +104,23 @@ def test():
 def generate_samples():
     latents = model.generate()
     x_tilde, _ = autoencoder.decode(latents)
-    # images = ((x_tilde + 1) / 2).cpu().data
     images = x_tilde.cpu().data
     save_image(images, './sample_pixelcnn_cifar.png', nrow=8)
 
 
+def generate_reconstructions():
+    x, _ = test_loader.__iter__().next()
+    x = Variable(x[:32]).cuda()
+    latents, _ = autoencoder.encode(x)
+    x_tilde, _ = autoencoder.decode(latents)
+    x_cat = torch.cat([x, x_tilde], 0)
+    images = x_cat.cpu().data
+    save_image(images, './sample_cifar.png', nrow=8)
+
+
 BEST_LOSS = 999
 LAST_SAVED = -1
+generate_reconstructions()
 for epoch in range(1, N_EPOCHS):
     print("\nEpoch {}:".format(epoch))
     train()