cifar10 sort of works

ritheshkumar95 · ritheshkumar95 · commit 64f4b56aaa16 · 2018-04-26T09:17:27.000-04:00
diff --git a/main.py b/main.py
@@ -12,14 +12,14 @@
 NUM_WORKERS = 4
 LR = 2e-4
 K = 512
-LAMDA = 0.25
+LAMDA = 1
 PRINT_INTERVAL = 100
 N_EPOCHS = 100
 
 
 preproc_transform = transforms.Compose([
     transforms.ToTensor(),
-    # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
 ])
 train_loader = torch.utils.data.DataLoader(
     datasets.CIFAR10(
@@ -102,7 +102,7 @@ def generate_samples():
     x_tilde, _, _ = model(x)
 
     x_cat = torch.cat([x, x_tilde], 0)
-    images = x_cat.cpu().data
+    images = (x_cat.cpu().data + 1) / 2
     save_image(images, './sample_cifar.png', nrow=8)
 
 
diff --git a/modules.py b/modules.py
@@ -6,15 +6,15 @@
 
 def to_scalar(arr):
     if type(arr) == list:
-        return [x.cpu().data.tolist()[0] for x in arr]
+        return [x.item() for x in arr]
     else:
-        return arr.cpu().data.tolist()[0]
+        return arr.item()
 
 
 def weights_init(m):
     classname = m.__class__.__name__
     if classname.find('Conv') != -1:
-        nn.init.xavier_uniform(m.weight.data)
+        nn.init.xavier_uniform_(m.weight.data)
         m.bias.data.fill_(0)
 
 
@@ -25,7 +25,7 @@ def __init__(self, dim):
             nn.ReLU(True),
             nn.Conv2d(dim, dim, 3, 1, 1),
             nn.ReLU(True),
-            nn.Conv2d(dim, dim, 1)
+            nn.Conv2d(dim, dim, 1),
         )
 
     def forward(self, x):
@@ -39,21 +39,22 @@ def __init__(self, K=512):
             nn.Conv2d(3, 256, 4, 2, 1),
             nn.ReLU(True),
             nn.Conv2d(256, 256, 4, 2, 1),
-            nn.ReLU(True),
             ResBlock(256),
             ResBlock(256),
         )
 
         self.embedding = nn.Embedding(K, 256)
-        self.embedding.weight.data.copy_(1./K * torch.randn(K, 256))
+        # self.embedding.weight.data.copy_(1./K * torch.randn(K, 256))
+        self.embedding.weight.data.uniform_(-1./K, 1./K)
 
         self.decoder = nn.Sequential(
             ResBlock(256),
             ResBlock(256),
+            nn.ReLU(True),
             nn.ConvTranspose2d(256, 256, 4, 2, 1),
             nn.ReLU(True),
             nn.ConvTranspose2d(256, 3, 4, 2, 1),
-            nn.Sigmoid()
+            nn.Tanh()
         )
 
         self.apply(weights_init)
@@ -145,7 +146,7 @@ def forward(self, x_v, x_h):
 
 
 class GatedPixelCNN(nn.Module):
-    def __init__(self, input_dim=256, dim=64, n_layers=7):
+    def __init__(self, input_dim=256, dim=64, n_layers=15):
         super().__init__()
         self.dim = 64
 
diff --git a/pixelcnn.py b/pixelcnn.py
@@ -10,15 +10,18 @@
 
 BATCH_SIZE = 64
 NUM_WORKERS = 4
-LR = 1e-3
-K = 256
+LR = 3e-4
+K = 512
+DIM = 64
+N_LAYERS = 15
 PRINT_INTERVAL = 100
 N_EPOCHS = 100
+ALWAYS_SAVE = True
 
 
 preproc_transform = transforms.Compose([
     transforms.ToTensor(),
-    # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
 ])
 train_loader = torch.utils.data.DataLoader(
     datasets.CIFAR10(
@@ -37,10 +40,12 @@
 )
 
 autoencoder = AutoEncoder(K).cuda()
-autoencoder.load_state_dict(torch.load('best_autoencoder.pt'))
+autoencoder.load_state_dict(
+    torch.load('best_autoencoder.pt')
+)
 autoencoder.eval()
 
-model = GatedPixelCNN().cuda()
+model = GatedPixelCNN(K, DIM, N_LAYERS).cuda()
 criterion = nn.CrossEntropyLoss().cuda()
 opt = torch.optim.Adam(model.parameters(), lr=LR)
 
@@ -104,7 +109,7 @@ def test():
 def generate_samples():
     latents = model.generate()
     x_tilde, _ = autoencoder.decode(latents)
-    images = x_tilde.cpu().data
+    images = (x_tilde.cpu().data + 1) / 2
     save_image(images, './sample_pixelcnn_cifar.png', nrow=8)
 
 
@@ -114,7 +119,7 @@ def generate_reconstructions():
     latents, _ = autoencoder.encode(x)
     x_tilde, _ = autoencoder.decode(latents)
     x_cat = torch.cat([x, x_tilde], 0)
-    images = x_cat.cpu().data
+    images = (x_cat.cpu().data + 1) / 2
     save_image(images, './sample_cifar.png', nrow=8)
 
 
@@ -126,7 +131,7 @@ def generate_reconstructions():
     train()
     cur_loss = test()
 
-    if cur_loss <= BEST_LOSS:
+    if ALWAYS_SAVE or cur_loss <= BEST_LOSS:
         BEST_LOSS = cur_loss
         LAST_SAVED = epoch