pushing initial code

ritheshkumar95 · ritheshkumar95 · commit 3a2ad711cf6a · 2018-04-19T17:29:29.000-04:00
diff --git a/main.py b/main.py
@@ -0,0 +1,79 @@
+import torch
+import torch.nn.functional as F
+from torchvision import datasets, transforms
+from modules import AutoEncoder, to_scalar
+from torch.autograd import Variable
+import numpy as np
+from torchvision.utils import save_image
+import time
+
+
+kwargs = {'num_workers': 2, 'pin_memory': True}
+train_loader = torch.utils.data.DataLoader(
+    datasets.FashionMNIST(
+        'data/FashionMNIST/', train=True, download=True,
+        transform=transforms.ToTensor()
+        ), batch_size=64, shuffle=False, **kwargs
+    )
+
+test_loader = torch.utils.data.DataLoader(
+    datasets.FashionMNIST(
+        'data/FashionMNIST/', train=False,
+        transform=transforms.ToTensor()
+    ), batch_size=32, shuffle=False, **kwargs
+)
+test_data = list(test_loader)
+
+model = AutoEncoder().cuda()
+opt = torch.optim.Adam(model.parameters(), lr=3e-4)
+
+
+def train(epoch):
+    train_loss = []
+    for batch_idx, (data, _) in enumerate(train_loader):
+        start_time = time.time()
+        x = Variable(data, requires_grad=False).cuda()
+
+        opt.zero_grad()
+
+        x_tilde, z_e_x, z_q_x = model(x)
+        z_q_x.retain_grad()
+
+        loss_recons = F.binary_cross_entropy(x_tilde, x)
+        loss_recons.backward(retain_graph=True)
+
+        # Straight-through estimator
+        z_e_x.backward(z_q_x.grad, retain_graph=True)
+
+        # Vector quantization objective
+        loss_vq = F.mse_loss(z_q_x, z_e_x.detach())
+        loss_vq.backward(retain_graph=True)
+
+        # Commitment objective
+        loss_commit = 0.25 * F.mse_loss(z_e_x, z_q_x.detach())
+        loss_commit.backward()
+        opt.step()
+
+        train_loss.append(to_scalar([loss_recons, loss_vq]))
+
+        if (batch_idx + 1) % 100 == 0:
+            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {} Time: {}'.format(
+                epoch, batch_idx * len(data), len(train_loader.dataset),
+                100. * batch_idx / len(train_loader),
+                np.asarray(train_loss)[-100:].mean(0),
+                time.time() - start_time
+            ))
+
+
+def test():
+    x = Variable(test_data[0][0]).cuda()
+    x_tilde, _, _ = model(x)
+
+    x_cat = torch.cat([x, x_tilde], 0)
+    images = x_cat.cpu().data
+    save_image(images, './sample_fashion_mnist.png', nrow=8)
+
+
+for i in range(100):
+    train(i)
+    test()
diff --git a/modules.py b/modules.py
@@ -0,0 +1,59 @@
+import torch
+import torch.nn as nn
+
+
+def to_scalar(arr):
+    if type(arr) == list:
+        return [x.cpu().data.tolist()[0] for x in arr]
+    else:
+        return arr.cpu().data.tolist()[0]
+
+
+def euclidean_distance(z_e_x, emb):
+    dists = torch.pow(
+        z_e_x.unsqueeze(1) - emb[None, :, :, None, None],
+        2
+    ).sum(2)
+    return dists
+
+
+class AutoEncoder(nn.Module):
+    def __init__(self):
+        super(AutoEncoder, self).__init__()
+        self.encoder = nn.Sequential(
+            nn.Conv2d(1, 16, 4, 2, 1),
+            nn.BatchNorm2d(16),
+            nn.ReLU(True),
+            nn.Conv2d(16, 32, 4, 2, 1),
+            nn.BatchNorm2d(32),
+            nn.ReLU(True),
+            nn.Conv2d(32, 64, 1, 1, 0),
+            nn.BatchNorm2d(64),
+        )
+
+        self.embedding = nn.Embedding(512, 64)
+
+        self.decoder = nn.Sequential(
+            nn.Conv2d(64, 32, 1, 1, 0),
+            nn.BatchNorm2d(32),
+            nn.ReLU(True),
+            nn.ConvTranspose2d(32, 16, 4, 2, 1),
+            nn.BatchNorm2d(16),
+            nn.ReLU(True),
+            nn.ConvTranspose2d(16, 1, 4, 2, 1),
+            nn.Sigmoid()
+        )
+
+    def forward(self, x):
+        z_e_x = self.encoder(x)
+        B, C, H, W = z_e_x.size()
+
+        dists = euclidean_distance(z_e_x, self.embedding.weight)
+        latents = dists.min(1)[1]
+
+        shp = latents.size() + (-1, )
+        z_q_x = self.embedding(latents.view(-1)).view(*shp)
+        z_q_x = z_q_x.permute(0, 3, 1, 2)
+
+        x_tilde = self.decoder(z_q_x)
+        return x_tilde, z_e_x, z_q_x
diff --git a/sample_fashion_mnist.png b/sample_fashion_mnist.png