minor bug fixes

ritheshkumar95 · ritheshkumar95 · commit ca3d9434140c · 2018-05-04T13:57:14.000-04:00
diff --git a/modules.py b/modules.py
@@ -19,40 +19,6 @@ def weights_init(m):
         m.bias.data.fill_(0)
 
 
-class ResBlock(nn.Module):
-    def __init__(self, dim):
-        super().__init__()
-        self.block = nn.Sequential(
-            nn.ReLU(True),
-            nn.Conv2d(dim, dim, 3, 1, 1),
-            nn.ReLU(True),
-            nn.Conv2d(dim, dim, 1),
-        )
-
-    def forward(self, x):
-        return x + self.block(x)
-
-
-class VQEmbedding(nn.Module):
-    def __init__(self, K, D):
-        super().__init__()
-        self.embedding = nn.Embedding(K, D)
-        self.embedding.weight.data.uniform_(-1./K, 1./K)
-
-    def forward(self, z_e_x):
-        # z_e_x - (B, D, H, W)
-        # emb   - (K, D)
-
-        emb = self.embedding.weight
-        dists = torch.pow(
-            z_e_x.unsqueeze(1) - emb[None, :, :, None, None],
-            2
-        ).sum(2)
-
-        latents = dists.min(1)[1]
-        return latents
-
-
 class VAE(nn.Module):
     def __init__(self, input_dim, dim, z_dim):
         super().__init__()
@@ -90,11 +56,48 @@ def forward(self, x):
         return x_tilde, kl_div
 
 
-class VectorQuantizedAE(nn.Module):
+class VQEmbedding(nn.Module):
+    def __init__(self, K, D):
+        super().__init__()
+        self.embedding = nn.Embedding(K, D)
+        self.embedding.weight.data.uniform_(-1./K, 1./K)
+
+    def forward(self, z_e_x):
+        # z_e_x - (B, D, H, W)
+        # emb   - (K, D)
+
+        emb = self.embedding.weight
+        dists = torch.pow(
+            z_e_x.unsqueeze(1) - emb[None, :, :, None, None],
+            2
+        ).sum(2)
+
+        latents = dists.min(1)[1]
+        return latents
+
+
+class ResBlock(nn.Module):
+    def __init__(self, dim):
+        super().__init__()
+        self.block = nn.Sequential(
+            nn.ReLU(True),
+            nn.Conv2d(dim, dim, 3, 1, 1),
+            nn.BatchNorm2d(dim),
+            nn.ReLU(True),
+            nn.Conv2d(dim, dim, 1),
+            nn.BatchNorm2d(dim)
+        )
+
+    def forward(self, x):
+        return x + self.block(x)
+
+
+class VectorQuantizedVAE(nn.Module):
     def __init__(self, input_dim, dim, K=512):
         super().__init__()
         self.encoder = nn.Sequential(
             nn.Conv2d(input_dim, dim, 4, 2, 1),
+            nn.BatchNorm2d(dim),
             nn.ReLU(True),
             nn.Conv2d(dim, dim, 4, 2, 1),
             ResBlock(dim),
@@ -108,6 +111,7 @@ def __init__(self, input_dim, dim, K=512):
             ResBlock(dim),
             nn.ReLU(True),
             nn.ConvTranspose2d(dim, dim, 4, 2, 1),
+            nn.BatchNorm2d(dim),
             nn.ReLU(True),
             nn.ConvTranspose2d(dim, input_dim, 4, 2, 1),
             nn.Tanh()
diff --git a/pixelcnn_baseline.py b/pixelcnn_baseline.py
@@ -14,7 +14,7 @@
 DATASET = 'FashionMNIST'  # CIFAR10 | MNIST | FashionMNIST
 NUM_WORKERS = 4
 
-LATENT_SHAPE = (28, 28)  # (8, 8) -> 32x32 images, (7, 7) -> 28x28 images
+IMAGE_SHAPE = (28, 28)  # (32, 32) | (28, 28)
 INPUT_DIM = 3  # 3 (RGB) | 1 (Grayscale)
 K = 256
 DIM = 64
@@ -100,7 +100,7 @@ def generate_samples():
     label = torch.arange(10).expand(10, 10).contiguous().view(-1)
     label = label.long().cuda()
 
-    x_tilde = model.generate(label, shape=LATENT_SHAPE, batch_size=100)
+    x_tilde = model.generate(label, shape=IMAGE_SHAPE, batch_size=100)
     images = x_tilde.cpu().data.float() / (K - 1)
 
     save_image(
diff --git a/pixelcnn_prior.py b/pixelcnn_prior.py
@@ -1,7 +1,7 @@
 import torch
 import torch.nn as nn
 from torchvision import datasets, transforms
-from modules import AutoEncoder, GatedPixelCNN, to_scalar
+from modules import VectorQuantizedVAE, GatedPixelCNN
 import numpy as np
 from torchvision.utils import save_image
 import time
@@ -11,11 +11,11 @@
 N_EPOCHS = 100
 PRINT_INTERVAL = 100
 ALWAYS_SAVE = True
-DATASET = 'CIFAR10'  # CIFAR10 | MNIST | FashionMNIST
+DATASET = 'MNIST'  # CIFAR10 | MNIST | FashionMNIST
 NUM_WORKERS = 4
 
-LATENT_SHAPE = (8, 8) # (8, 8) -> 32x32 images, (7, 7) -> 28x28 images
-INPUT_DIM = 3  # 3 (RGB) | 1 (Grayscale)
+LATENT_SHAPE = (7, 7)  # (8, 8) -> 32x32 images, (7, 7) -> 28x28 images
+INPUT_DIM = 1  # 3 (RGB) | 1 (Grayscale)
 DIM = 64
 VAE_DIM = 256
 N_LAYERS = 15
@@ -43,7 +43,7 @@
     num_workers=NUM_WORKERS, pin_memory=True
 )
 
-autoencoder = AutoEncoder(INPUT_DIM, VAE_DIM, K).to(DEVICE)
+autoencoder = VectorQuantizedVAE(INPUT_DIM, VAE_DIM, K).to(DEVICE)
 autoencoder.load_state_dict(
     torch.load('models/{}_vqvae.pt'.format(DATASET))
 )
@@ -78,7 +78,7 @@ def train():
         loss.backward()
         opt.step()
 
-        train_loss.append(to_scalar(loss))
+        train_loss.append(loss.item())
 
         if (batch_idx + 1) % PRINT_INTERVAL == 0:
             print('\tIter: [{}/{} ({:.0f}%)]\tLoss: {} Time: {}'.format(
@@ -104,7 +104,7 @@ def test():
                 logits.view(-1, K),
                 latents.view(-1)
             )
-            val_loss.append(to_scalar(loss))
+            val_loss.append(loss.item())
 
     print('Validation Completed!\tLoss: {} Time: {}'.format(
         np.asarray(val_loss).mean(0),
@@ -128,25 +128,8 @@ def generate_samples():
     )
 
 
-def generate_reconstructions():
-    x, _ = test_loader.__iter__().next()
-    x = x[:32].to(DEVICE)
-
-    latents, _ = autoencoder.encode(x)
-    x_tilde, _ = autoencoder.decode(latents)
-    x_cat = torch.cat([x, x_tilde], 0)
-    images = (x_cat.cpu().data + 1) / 2
-
-    save_image(
-        images,
-        'samples/reconstructions_{}.png'.format(DATASET),
-        nrow=8
-    )
-
-
 BEST_LOSS = 999
 LAST_SAVED = -1
-generate_reconstructions()
 for epoch in range(1, N_EPOCHS):
     print("\nEpoch {}:".format(epoch))
     train()
diff --git a/vqvae.py b/vqvae.py
@@ -8,7 +8,7 @@
 from torchvision.utils import save_image
 from torch.distributions.normal import Normal
 
-from modules import VectorQuantizedAE, to_scalar
+from modules import VectorQuantizedVAE, to_scalar
 
 
 BATCH_SIZE = 32
@@ -21,7 +21,7 @@
 DIM = 256
 K = 512
 LAMDA = 1
-LR = 3e-4
+LR = 1e-3
 
 DEVICE = torch.device('cuda')  # torch.device('cpu')
 
@@ -49,7 +49,7 @@
     num_workers=NUM_WORKERS, pin_memory=True
 )
 
-model = VectorQuantizedAE(INPUT_DIM, DIM, K).to(DEVICE)
+model = VectorQuantizedVAE(INPUT_DIM, DIM, K).to(DEVICE)
 print(model)
 opt = torch.optim.Adam(model.parameters(), lr=LR, amsgrad=True)