MandyMo
diff --git a/‎src/Discriminator.py
Lines changed: 56 additions & 80 deletions b/‎src/Discriminator.py
Lines changed: 56 additions & 80 deletions
diff --git a/‎src/HourGlass.py
Lines changed: 8 additions & 1 deletion b/‎src/HourGlass.py
Lines changed: 8 additions & 1 deletion
diff --git a/‎src/LinearModel.py
Lines changed: 9 additions & 1 deletion b/‎src/LinearModel.py
Lines changed: 9 additions & 1 deletion
diff --git a/‎src/PRNetEncoder.py
Lines changed: 101 additions & 0 deletions b/‎src/PRNetEncoder.py
Lines changed: 101 additions & 0 deletions
diff --git a/‎src/Resnet.py
Lines changed: 8 additions & 0 deletions b/‎src/Resnet.py
Lines changed: 8 additions & 0 deletions
@@ -1,13 +1,23 @@
 
+'''
+    file:   Discriminator.py
+
+    date:   2017_04_29
+    author: zhangxiong(1025679612@qq.com)
+'''
+
 from LinearModel import LinearModel
 import config
 import util
 import torch
 import numpy as np
 import torch.nn as nn
 from config import args
-import torch.nn.functional as F
 
+'''
+    shape discriminator is used for shape discriminator
+    the inputs if N x 10
+'''
 class ShapeDiscriminator(LinearModel):
     def __init__(self, fc_layers, use_dropout, drop_prob, use_ac_func):
         if fc_layers[-1] != 1:
@@ -19,20 +29,36 @@ def __init__(self, fc_layers, use_dropout, drop_prob, use_ac_func):
     def forward(self, inputs):
         return self.fc_blocks(inputs)
 
-class PoseDiscriminator(LinearModel):
-    def __init__(self, fc_layers, use_dropout, drop_prob, use_ac_func):
-        if fc_layers[-1] != 1:
-            msg = 'the neuron count of the last layer must be 1, but got {}'.format(fc_layers[-1])
+class PoseDiscriminator(nn.Module):
+    def __init__(self, channels):
+        super(PoseDiscriminator, self).__init__()
+
+        if channels[-1] != 1:
+            msg = 'the neuron count of the last layer must be 1, but got {}'.format(channels[-1])
             sys.exit(msg)
 
-        super(PoseDiscriminator, self).__init__(fc_layers, use_dropout, drop_prob, use_ac_func)
+        self.conv_blocks = nn.Sequential()
+        l = len(channels)
+        for idx in range(l - 2):
+            self.conv_blocks.add_module(
+                name = 'conv_{}'.format(idx),
+                module = nn.Conv2d(in_channels = channels[idx], out_channels = channels[idx + 1], kernel_size = 1, stride = 1)
+            )
 
+        self.fc_layer = nn.ModuleList()
+        for idx in range(23):
+            self.fc_layer.append(nn.Linear(in_features = channels[l - 2], out_features = 1))
+
+    # N x 23 x 9
     def forward(self, inputs):
-        '''
-        x = self.fc_blocks(inputs)
-        return [x, self.last_block(x)]
-        '''
-        return self.fc_blocks(inputs)
+        batch_size = inputs.shape[0]
+        inputs = inputs.transpose(1, 2).unsqueeze(2) # to N x 9 x 1 x 23
+        internal_outputs = self.conv_blocks(inputs) # to N x c x 1 x 23
+        o = []
+        for idx in range(23):
+            o.append(self.fc_layer[idx](internal_outputs[:,:,0,idx]))
+        
+        return torch.cat(o, 1), internal_outputs
 
 class FullPoseDiscriminator(LinearModel):
     def __init__(self, fc_layers, use_dropout, drop_prob, use_ac_func):
@@ -64,21 +90,16 @@ def _create_sub_modules(self):
         '''
             create theta discriminator for 23 joint
         '''
-        fc_layers = [9, 32, 32, 1]
-        use_dropout = [False, False, False]
-        drop_prob = [0.5, 0.5, 0.5]
-        use_ac_func = [True, True, False]
-        self.pose_discriminators = nn.ModuleList()
-        for _ in range(self.joint_count - 1):
-            self.pose_discriminators.append(PoseDiscriminator(fc_layers, use_dropout, drop_prob, use_ac_func))
+
+        self.pose_discriminator = PoseDiscriminator([9, 32, 32, 1])
 
         '''
             create full pose discriminator for total 23 joints
         '''
-        fc_layers = [(self.joint_count - 1) * 9, 1024, 1024, 1024, 1]
-        use_dropout = [False, False, False, False]
-        drop_prob = [0.5, 0.5, 0.5, 0.5]
-        use_ac_func = [True, True, True, False]
+        fc_layers = [23 * 32, 1024, 1024, 1]
+        use_dropout = [False, False, False]
+        drop_prob = [0.5, 0.5, 0.5]
+        use_ac_func = [True, True, False]
         self.full_pose_discriminator = FullPoseDiscriminator(fc_layers, use_dropout, drop_prob, use_ac_func)
 
         '''
@@ -92,67 +113,22 @@ def _create_sub_modules(self):
 
         print('finished create the discriminator modules...')
 
-    '''
-        purpose:
-            calc mean shape discriminator value
-        inputs:
-            real_shape N x 10
-            fake_shape n x 10
-        return:
-            shape discriminator output value
-    '''
-    def calc_shape_disc_value(self, real_shape, fake_shape):
-        shapes = torch.cat([real_shape, fake_shape], dim = 0)
-        return self.shape_discriminator(shapes)
-
-    '''
-        inputs:
-            real_pose N x 24 x 3
-            fake_pose n x 24 x 3
-        return:
-            pose discriminator output value
-    '''
-    def calc_pose_disc_value(self, real_pose, fake_pose):
-        real_pose = util.batch_rodrigues(real_pose.view(-1, 3)).view(-1, 24, 9)
-        fake_pose = util.batch_rodrigues(fake_pose.view(-1, 3)).view(-1, 24, 9)
-        poses = torch.cat((real_pose[:, 1:, :], fake_pose[:, 1:, :]), dim = 0)
-        full_pose_dis_value = self.full_pose_discriminator(poses.view(-1, 23 * 9))
-        poses = torch.transpose(poses, 0, 1)
-        theta_disc_values = []
-        for _ in range(23):
-            theta_disc_values.append(
-                self.pose_discriminators[_](poses[_, :, :])
-            )
-        pose_dis_value = torch.cat(theta_disc_values, dim = 1)
-        return torch.cat([pose_dis_value, full_pose_dis_value], dim = 1)        
 
     '''
-        inputs:
-            real_thetas N x 85
-            fake_thetas N x 85
-        return
-            pose & full pose & shape disc value N x (23 + 1 + 1)
+        inputs is N x 85(3 + 72 + 10)
     '''
-    def calc_thetas_disc_value(self, real_thetas, fake_thetas):
-        real_poses, fake_poses = real_thetas[:, 3:75], fake_thetas[:, 3:75]
-        real_shapes, fake_shapes = real_thetas[:, 75:], fake_thetas[:, 75:]
-        pose_disc_value = self.calc_pose_disc_value(real_poses.contiguous(), fake_poses.contiguous())
-        shape_disc_value = self.calc_shape_disc_value(real_shapes.contiguous(), fake_shapes.contiguous())
-        return torch.cat([pose_disc_value, shape_disc_value], dim = 1)
-
-    def forward(self, real_thetas, fake_thetas):
-        if config.args.normalize_disc:
-            return F.sigmoid(self.calc_thetas_disc_value(real_thetas, fake_thetas))
-        else:
-            return self.calc_thetas_disc_value(real_thetas, fake_thetas)
-
+    def forward(self, thetas):
+        batch_size = thetas.shape[0]
+        cams, poses, shapes = thetas[:, :3], thetas[:, 3:75], thetas[:, 75:]
+        shape_disc_value = self.shape_discriminator(shapes)
+        rotate_matrixs = util.batch_rodrigues(poses.contiguous().view(-1, 3)).view(-1, 24, 9)[:, 1:, :]
+        pose_disc_value, pose_inter_disc_value = self.pose_discriminator(rotate_matrixs)
+        full_pose_disc_value = self.full_pose_discriminator(pose_inter_disc_value.contiguous().view(batch_size, -1))
+        return torch.cat((pose_disc_value, full_pose_disc_value, shape_disc_value), 1)
 
 if __name__ == '__main__':
     device = torch.device('cuda')
-    net = Discriminator().to(device)
-    real = torch.zeros((100, 85)).float().to(device)
-    fake = torch.ones((200, 85)).float().to(device)
-
-    dis_v = net(real, fake)
-    print(dis_v.device)
-    print(dis_v.shape)
+    net = Discriminator()
+    inputs = torch.ones((100, 85))
+    disc_value = net(inputs)
+    print(net)
@@ -1,4 +1,11 @@
 
+'''
+    file:   hourglass.py
+
+    date:   2018_05_12
+    author: zhangxiong(1025679612@qq.com)
+'''
+
 from __future__ import print_function
 import numpy as np
 import torch
@@ -208,4 +215,4 @@ def _create_hourglass_net():
         nChannels = 256,
         nJointCount = 1,
         bUseBn = True,
-    )
+    )
@@ -1,4 +1,12 @@
 
+
+'''
+    file:   LinearModel.py
+
+    date:   2018_04_29
+    author: zhangxiong(1025679612@qq.com)
+'''
+
 import torch.nn as nn
 import numpy as np
 import sys
@@ -93,4 +101,4 @@ def forward(self, inputs):
     net = LinearModel(fc_layers, use_dropout, drop_prob, use_ac_func).to(device)
     print(net)
     nx = np.zeros([2, 2048])
-    vx = torch.from_numpy(nx).to(device)
+    vx = torch.from_numpy(nx).to(device)
@@ -0,0 +1,101 @@
+
+
+'''
+    file:   PRnetEncoder.py
+
+    date:   2018_05_22
+    author: zhangxiong(1025679612@qq.com)
+    mark:   the algorithm is cited from PRNet code
+'''
+
+from __future__ import print_function
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+class Residual(nn.Module):
+    def __init__(self, use_bn, input_channels, out_channels, mid_channels, kernel_size = 3, padding = 1, stride = 1):
+        super(Residual, self).__init__()
+        self.use_bn = use_bn
+        self.out_channels   = out_channels
+        self.input_channels = input_channels
+        self.mid_channels   = mid_channels
+
+        self.down_channel = nn.Conv2d(input_channels, self.mid_channels, kernel_size = 1)
+        self.AcFunc       = nn.ReLU()
+        if use_bn:
+            self.bn_0 = nn.BatchNorm2d(num_features = self.mid_channels)
+            self.bn_1 = nn.BatchNorm2d(num_features = self.mid_channels)
+            self.bn_2 = nn.BatchNorm2d(num_features = self.out_channels)
+
+        self.conv = nn.Conv2d(self.mid_channels, self.mid_channels, kernel_size = kernel_size, padding = padding, stride = stride)
+
+        self.up_channel = nn.Conv2d(self.mid_channels, out_channels, kernel_size= 1)
+
+        if input_channels != out_channels:
+            self.trans = nn.Conv2d(input_channels, out_channels, kernel_size = 1)
+    
+    def forward(self, inputs):
+        x = self.down_channel(inputs)
+        if self.use_bn:
+            x = self.bn_0(x)
+        x = self.AcFunc(x)
+
+        x = self.conv(x)
+        if self.use_bn:
+            x = self.bn_1(x)
+        x = self.AcFunc(x)
+
+        x = self.up_channel(x)
+
+        if self.input_channels != self.out_channels:
+            x += self.trans(inputs)
+        else:
+            x += inputs
+
+        if self.use_bn:
+            x = self.bn_2(x)
+        
+        return self.AcFunc(x)
+
+class PRNetEncoder(nn.Module):
+    def __init__(self):
+        super(PRNetEncoder, self).__init__()
+        self.conv_blocks = nn.Sequential(
+            nn.Conv2d(in_channels = 3, out_channels = 8, kernel_size = 3, stride = 1, padding = 1), # to 256 x 256 x 8
+            nn.Conv2d(in_channels = 8, out_channels = 16, kernel_size = 3, stride = 1, padding = 1), # to 256 x 256 x 16
+            Residual(use_bn = True, input_channels = 16, out_channels = 32, mid_channels = 16, stride = 1, padding = 1), # to 256 x 256 x 32
+            nn.MaxPool2d(kernel_size = 2, stride = 2), # to 128 x 128 x 32
+            Residual(use_bn = True, input_channels = 32, out_channels = 32, mid_channels = 16, stride = 1, padding = 1), # to 128 x 128 x 32
+            Residual(use_bn = True, input_channels = 32, out_channels = 32, mid_channels = 16, stride = 1, padding = 1), # to 128 x 128 x 32
+            Residual(use_bn = True, input_channels = 32, out_channels = 64, mid_channels = 32, stride = 1, padding = 1), # to 128 x 128 x 64
+            nn.MaxPool2d(kernel_size = 2, stride = 2), # to 64 x 64 x 64
+            Residual(use_bn = True, input_channels = 64, out_channels = 64, mid_channels = 32, stride = 1, padding = 1), # to 64 x 64 x 64
+            Residual(use_bn = True, input_channels = 64, out_channels = 64, mid_channels = 32, stride = 1, padding = 1), # to 64 x 64 x 64
+            Residual(use_bn = True, input_channels = 64, out_channels = 128, mid_channels = 64, stride = 1, padding = 1), # to 64 x 64 x 128
+            nn.MaxPool2d(kernel_size = 2, stride = 2), # to 32 x 32 x 128
+            Residual(use_bn = True, input_channels = 128, out_channels = 128, mid_channels = 64, stride = 1, padding = 1), # to 32 x 32 x 128
+            Residual(use_bn = True, input_channels = 128, out_channels = 128, mid_channels = 64, stride = 1, padding = 1), # to 32 x 32 x 128
+            Residual(use_bn = True, input_channels = 128, out_channels = 256, mid_channels = 128, stride = 1, padding = 1), # to 32 x 32 x 256
+            nn.MaxPool2d(kernel_size = 2, stride = 2), # to 16 x 16 x 256
+            Residual(use_bn = True, input_channels = 256, out_channels = 256, mid_channels = 128, stride = 1, padding = 1), # to 16 x 16 x 256
+            Residual(use_bn = True, input_channels = 256, out_channels = 256, mid_channels = 128, stride = 1, padding = 1), # to 16 x 16 x 256
+            Residual(use_bn = True, input_channels = 256, out_channels = 512, mid_channels = 256, stride = 1, padding = 1), # to 16 x 16 x 512
+            nn.MaxPool2d(kernel_size = 2, stride = 2), # to 8 x 8 x 512
+            Residual(use_bn = True, input_channels = 512, out_channels = 512, mid_channels = 256, stride = 1, padding = 1), # to 8 x 8 x 512
+            nn.MaxPool2d(kernel_size = 2, stride = 2) , # to 4 x 4 x 512
+            Residual(use_bn = True, input_channels = 512, out_channels = 512, mid_channels = 256, stride = 1, padding = 1), # to 4 x 4 x 512
+            nn.MaxPool2d(kernel_size = 2, stride = 2), # to 2 x 2 x 512
+            Residual(use_bn = True, input_channels = 512, out_channels = 512, mid_channels = 256, stride = 1, padding = 1) # to 2 x 2 x 512
+        )
+    
+    def forward(self, inputs):
+        return self.conv_blocks(inputs).view(-1, 2048)
+
+
+if __name__ == '__main__':
+    net = PRNetEncoder()
+    inputs = torch.ones(size = (10, 3, 256, 256)).float()
+    r = net(inputs)
+    print(r.shape)
@@ -1,4 +1,12 @@
 
+'''
+    file:   Resnet.py
+
+    date:   2018_05_02
+    author: zhangxiong(1025679612@qq.com)
+    mark:   copied from pytorch sourc code
+'''
+
 import torch.nn as nn
 import torch.nn.functional as F
 import torch