diff --git a/Makefile b/Makefile
index 25be7c891..c75b3eabf 100644
--- a/Makefile
+++ b/Makefile
@@ -68,6 +68,10 @@ else
 	LDFLAGS += -Wl,-rpath=$(CAFFE_DIR)/lib
 	INCLUDE_DIRS += $(CAFFE_DIR)/include
 	LIBRARY_DIRS += $(CAFFE_DIR)/lib
+        
+        ifeq ($(DEEP_NET), tensorrt)
+                COMMON_FLAGS += -DUSE_TENSORRT
+        endif
 endif
 
 ##############################
@@ -145,6 +149,11 @@ ifeq ($(USE_CUDA), 1)
 	LIBRARIES += cudart cublas curand
 endif
 
+# TensorRT
+ifeq ($(DEEP_NET), tensorrt)
+        LIBRARIES += nvinfer nvcaffe_parser
+endif
+
 # LIBRARIES += glog gflags boost_system boost_filesystem m hdf5_hl hdf5 caffe
 LIBRARIES += glog gflags boost_system boost_filesystem m hdf5_hl hdf5
 
diff --git a/examples/tutorial_pose/1_extract_from_image.cpp b/examples/tutorial_pose/1_extract_from_image.cpp
index 461dd0bcc..d975ee00d 100644
--- a/examples/tutorial_pose/1_extract_from_image.cpp
+++ b/examples/tutorial_pose/1_extract_from_image.cpp
@@ -59,10 +59,29 @@ DEFINE_double(render_threshold,         0.05,           "Only estimated keypoint
 DEFINE_double(alpha_pose,               0.6,            "Blending factor (range 0-1) for the body part rendering. 1 will show it completely, 0 will"
                                                         " hide it. Only valid for GPU rendering.");
 
+typedef std::vector<std::pair<std::string, std::chrono::high_resolution_clock::time_point>> OpTimings;
+
+static OpTimings timings;
+
+static void timeNow(const std::string& label){
+    const auto now = std::chrono::high_resolution_clock::now();
+    const auto timing = std::make_pair(label, now);
+    timings.push_back(timing);
+}
+
+static std::string timeDiffToString(const std::chrono::high_resolution_clock::time_point& t1,
+                                const std::chrono::high_resolution_clock::time_point& t2 ) {
+    return std::to_string((double)std::chrono::duration_cast<std::chrono::duration<double>>(t1 - t2).count() * 1e3) + " ms";
+}
+
+
 int openPoseTutorialPose1()
 {
     op::log("OpenPose Library Tutorial - Example 1.", op::Priority::High);
     // ------------------------- INITIALIZATION -------------------------
+    
+    timeNow("Start");
+
     // Step 1 - Set logging level
         // - 0 will output all the logging messages
         // - 255 will output nothing
@@ -80,7 +99,7 @@ int openPoseTutorialPose1()
     // Check no contradictory flags enabled
     if (FLAGS_alpha_pose < 0. || FLAGS_alpha_pose > 1.)
         op::error("Alpha value for blending must be in the range [0,1].", __LINE__, __FUNCTION__, __FILE__);
-    if (FLAGS_scale_gap <= 0. && FLAGS_scale_number > 1)
+    if (FLAGS_scale_gap <= 0. && FLAGS_scale_number > 1.)
         op::error("Incompatible flag configuration: scale_gap must be greater than 0 or scale_number = 1.",
                   __LINE__, __FUNCTION__, __FILE__);
     // Enabling Google Logging
@@ -101,6 +120,8 @@ int openPoseTutorialPose1()
     poseExtractorCaffe.initializationOnThread();
     poseRenderer.initializationOnThread();
 
+    timeNow("Initialization");
+    
     // ------------------------- POSE ESTIMATION AND RENDERING -------------------------
     // Step 1 - Read and load image, error if empty (possibly wrong path)
     // Alternative: cv::imread(FLAGS_image_path, CV_LOAD_IMAGE_COLOR);
@@ -125,13 +146,25 @@ int openPoseTutorialPose1()
     poseRenderer.renderPose(outputArray, poseKeypoints, scaleInputToOutput);
     // Step 6 - OpenPose output format to cv::Mat
     auto outputImage = opOutputToCvMat.formatToCvMat(outputArray);
-
+    timeNow("Step 5");
+    
     // ------------------------- SHOWING RESULT AND CLOSING -------------------------
     // Step 1 - Show results
     frameDisplayer.displayFrame(outputImage, 0); // Alternative: cv::imshow(outputImage) + cv::waitKey(0)
     // Step 2 - Logging information message
     op::log("Example 1 successfully finished.", op::Priority::High);
     // Return successful message
+
+    const auto totalTimeSec = timeDiffToString(timings.back().second, timings.front().second);
+    const auto message = "Pose estimation successfully finished. Total time: " + totalTimeSec + " seconds.";
+    op::log(message, op::Priority::High);
+
+    for(OpTimings::iterator timing = timings.begin()+1; timing != timings.end(); ++timing) {
+        const auto log_time = (*timing).first + " - " + timeDiffToString((*timing).second, (*(timing-1)).second);
+        op::log(log_time, op::Priority::High);
+    }
+
+
     return 0;
 }
 
diff --git a/examples/tutorial_pose/3_extract_from_image_TensorRT.cpp b/examples/tutorial_pose/3_extract_from_image_TensorRT.cpp
new file mode 100644
index 000000000..a855fa3da
--- /dev/null
+++ b/examples/tutorial_pose/3_extract_from_image_TensorRT.cpp
@@ -0,0 +1,180 @@
+// ------------------------- OpenPose Library Tutorial - Pose - Example 3 - Extract from Image with TensorRT -------------------------
+// This first example shows the user how to:
+// 1. Load an image (`filestream` module)
+// 2. Extract the pose of that image (`pose` module)
+// 3. Render the pose on a resized copy of the input image (`pose` module)
+// 4. Display the rendered pose (`gui` module)
+// In addition to the previous OpenPose modules, we also need to use:
+// 1. `core` module: for the Array<float> class that the `pose` module needs
+// 2. `utilities` module: for the error & logging functions, i.e. op::error & op::log respectively
+
+// 3rdparty dependencies
+// GFlags: DEFINE_bool, _int32, _int64, _uint64, _double, _string
+#include <gflags/gflags.h>
+// Allow Google Flags in Ubuntu 14
+#ifndef GFLAGS_GFLAGS_H_
+namespace gflags = google;
+#endif
+// OpenPose dependencies
+#include <openpose/core/headers.hpp>
+#include <openpose/filestream/headers.hpp>
+#include <openpose/gui/headers.hpp>
+#include <openpose/pose/headers.hpp>
+#include <openpose/utilities/headers.hpp>
+
+// See all the available parameter options withe the `--help` flag. E.g. `./build/examples/openpose/openpose.bin --help`.
+// Note: This command will show you flags for other unnecessary 3rdparty files. Check only the flags for the OpenPose
+// executable. E.g. for `openpose.bin`, look for `Flags from examples/openpose/openpose.cpp:`.
+// Debugging/Other
+DEFINE_int32(logging_level,             3,              "The logging level. Integer in the range [0, 255]. 0 will output any log() message, while"
+             " 255 will not output any. Current OpenPose library messages are in the range 0-4: 1 for"
+             " low priority messages and 4 for important ones.");
+// Producer
+DEFINE_string(image_path,               "examples/media/COCO_val2014_000000000192.jpg",     "Process the desired image.");
+// OpenPose
+DEFINE_string(model_pose,               "COCO",         "Model to be used. E.g. `COCO` (18 keypoints), `MPI` (15 keypoints, ~10% faster), "
+              "`MPI_4_layers` (15 keypoints, even faster but less accurate).");
+DEFINE_string(model_folder,             "models/",      "Folder path (absolute or relative) where the models (pose, face, ...) are located.");
+DEFINE_string(net_resolution,           "-1x368",       "Multiples of 16. If it is increased, the accuracy potentially increases. If it is"
+              " decreased, the speed increases. For maximum speed-accuracy balance, it should keep the"
+              " closest aspect ratio possible to the images or videos to be processed. Using `-1` in"
+              " any of the dimensions, OP will choose the optimal aspect ratio depending on the user's"
+              " input value. E.g. the default `-1x368` is equivalent to `656x368` in 16:9 resolutions,"
+              " e.g. full HD (1980x1080) and HD (1280x720) resolutions.");
+DEFINE_string(output_resolution,        "-1x-1",        "The image resolution (display and output). Use \"-1x-1\" to force the program to use the"
+              " input image resolution.");
+DEFINE_int32(num_gpu_start,             0,              "GPU device start number.");
+DEFINE_double(scale_gap,                0.3,            "Scale gap between scales. No effect unless scale_number > 1. Initial scale is always 1."
+              " If you want to change the initial scale, you actually want to multiply the"
+              " `net_resolution` by your desired initial scale.");
+DEFINE_int32(scale_number,              1,              "Number of scales to average.");
+// OpenPose Rendering
+DEFINE_bool(disable_blending,           false,          "If enabled, it will render the results (keypoint skeletons or heatmaps) on a black"
+            " background, instead of being rendered into the original image. Related: `part_to_show`,"
+            " `alpha_pose`, and `alpha_pose`.");
+DEFINE_double(render_threshold,         0.05,           "Only estimated keypoints whose score confidences are higher than this threshold will be"
+              " rendered. Generally, a high threshold (> 0.5) will only render very clear body parts;"
+              " while small thresholds (~0.1) will also output guessed and occluded keypoints, but also"
+              " more false positives (i.e. wrong detections).");
+DEFINE_double(alpha_pose,               0.6,            "Blending factor (range 0-1) for the body part rendering. 1 will show it completely, 0 will"
+              " hide it. Only valid for GPU rendering.");
+
+
+typedef std::vector<std::pair<std::string, std::chrono::high_resolution_clock::time_point>> OpTimings;
+
+static OpTimings timings;
+
+static void timeNow(const std::string& label){
+    const auto now = std::chrono::high_resolution_clock::now();
+    const auto timing = std::make_pair(label, now);
+    timings.push_back(timing);
+} 
+
+static std::string timeDiffToString(const std::chrono::high_resolution_clock::time_point& t1,
+                                const std::chrono::high_resolution_clock::time_point& t2 ) {
+    return std::to_string((double)std::chrono::duration_cast<std::chrono::duration<double>>(t1 - t2).count() * 1e3) + " ms";
+}
+
+int openPoseTutorialPose3()
+{
+#ifdef USE_TENSORRT
+    op::log("Starting pose estimation.", op::Priority::High);
+    
+    timeNow("Start");
+ 
+    op::log("OpenPose Library Tutorial - Pose Example 3.", op::Priority::High);
+    // ------------------------- INITIALIZATION -------------------------
+    // Step 1 - Set logging level
+        // - 0 will output all the logging messages
+        // - 255 will output nothing
+    op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.", __LINE__, __FUNCTION__, __FILE__);
+    op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level);
+    op::log("", op::Priority::Low, __LINE__, __FUNCTION__, __FILE__);
+    // Step 2 - Read Google flags (user defined configuration)
+    // outputSize
+    const auto outputSize = op::flagsToPoint(FLAGS_output_resolution, "-1x-1");
+    // netInputSize
+    const auto netInputSize = op::flagsToPoint(FLAGS_net_resolution, "-1x368");
+    // poseModel
+    const auto poseModel = op::flagsToPoseModel(FLAGS_model_pose);
+    // Check no contradictory flags enabled
+    if (FLAGS_alpha_pose < 0. || FLAGS_alpha_pose > 1.)
+        op::error("Alpha value for blending must be in the range [0,1].", __LINE__, __FUNCTION__, __FILE__);
+    if (FLAGS_scale_gap <= 0. && FLAGS_scale_number > 1)
+        op::error("Incompatible flag configuration: scale_gap must be greater than 0 or scale_number = 1.", __LINE__, __FUNCTION__, __FILE__);
+    // Enabling Google Logging
+    const bool enableGoogleLogging = true;
+    // Logging
+    op::log("", op::Priority::Low, __LINE__, __FUNCTION__, __FILE__);
+    // Step 3 - Initialize all required classes
+    op::ScaleAndSizeExtractor scaleAndSizeExtractor(netInputSize, outputSize, FLAGS_scale_number, FLAGS_scale_gap);
+    op::CvMatToOpInput cvMatToOpInput;
+    op::CvMatToOpOutput cvMatToOpOutput;
+    op::PoseExtractorTensorRT poseExtractorTensorRT{poseModel, FLAGS_model_folder,
+        FLAGS_num_gpu_start, {}, op::ScaleMode::ZeroToOne, enableGoogleLogging};
+    op::PoseCpuRenderer poseRenderer{poseModel, (float)FLAGS_render_threshold, !FLAGS_disable_blending,
+        (float)FLAGS_alpha_pose};
+    op::OpOutputToCvMat opOutputToCvMat;
+    op::FrameDisplayer frameDisplayer{"OpenPose Tutorial - Example 3", outputSize};
+    // Step 4 - Initialize resources on desired thread (in this case single thread, i.e. we init resources here)
+    poseExtractorTensorRT.initializationOnThread();
+    poseRenderer.initializationOnThread();
+    
+    timeNow("Initialization");
+
+    // ------------------------- POSE ESTIMATION AND RENDERING -------------------------
+    // Step 1 - Read and load image, error if empty (possibly wrong path)
+    // Alternative: cv::imread(FLAGS_image_path, CV_LOAD_IMAGE_COLOR);
+    cv::Mat inputImage = op::loadImage(FLAGS_image_path, CV_LOAD_IMAGE_COLOR);
+    if(inputImage.empty())
+        op::error("Could not open or find the image: " + FLAGS_image_path, __LINE__, __FUNCTION__, __FILE__);
+    const op::Point<int> imageSize{inputImage.cols, inputImage.rows};
+    // Step 2 - Get desired scale sizes
+    std::vector<double> scaleInputToNetInputs;
+    std::vector<op::Point<int>> netInputSizes;
+    double scaleInputToOutput;
+    op::Point<int> outputResolution;
+    std::tie(scaleInputToNetInputs, netInputSizes, scaleInputToOutput, outputResolution)
+    = scaleAndSizeExtractor.extract(imageSize);
+    // Step 3 - Format input image to OpenPose input and output formats
+    const auto netInputArray = cvMatToOpInput.createArray(inputImage, scaleInputToNetInputs, netInputSizes);
+    auto outputArray = cvMatToOpOutput.createArray(inputImage, scaleInputToOutput, outputResolution);
+    // Step 4 - Estimate poseKeypoints
+    poseExtractorTensorRT.forwardPass(netInputArray, imageSize, scaleInputToNetInputs);
+    const auto poseKeypoints = poseExtractorTensorRT.getPoseKeypoints();
+    // Step 5 - Render poseKeypoints
+    poseRenderer.renderPose(outputArray, poseKeypoints, scaleInputToOutput);
+    // Step 6 - OpenPose output format to cv::Mat
+    auto outputImage = opOutputToCvMat.formatToCvMat(outputArray);
+    timeNow("Step 5");
+
+    // ------------------------- SHOWING RESULT AND CLOSING -------------------------
+    // Step 1 - Show results
+    frameDisplayer.displayFrame(outputImage, 0); // Alternative: cv::imshow(outputImage) + cv::waitKey(0)
+    // Step 2 - Logging information message
+    op::log("Example 3 successfully finished.", op::Priority::High);
+  
+    const auto totalTimeSec = timeDiffToString(timings.back().second, timings.front().second);
+    const auto message = "Pose estimation successfully finished. Total time: " + totalTimeSec + " seconds.";
+    op::log(message, op::Priority::High);
+    
+    for(OpTimings::iterator timing = timings.begin()+1; timing != timings.end(); ++timing) {
+        const auto log_time = (*timing).first + " - " + timeDiffToString((*timing).second, (*(timing-1)).second);
+        op::log(log_time, op::Priority::High);
+    }
+    
+#endif // USE_TENSORRT
+  
+    // Return successful message
+    return 0;
+}
+
+int main(int argc, char *argv[])
+{
+    // Parsing command line flags
+    gflags::ParseCommandLineFlags(&argc, &argv, true);
+
+    // Running openPoseTutorialPose1
+    return openPoseTutorialPose3();
+}
+
diff --git a/include/openpose/core/netTensorRT.hpp b/include/openpose/core/netTensorRT.hpp
new file mode 100644
index 000000000..96b588657
--- /dev/null
+++ b/include/openpose/core/netTensorRT.hpp
@@ -0,0 +1,45 @@
+#ifndef OPENPOSE_CORE_NET_TENSORRT_HPP
+#define OPENPOSE_CORE_NET_TENSORRT_HPP
+
+#include <openpose/core/common.hpp>
+#include <openpose/core/net.hpp>
+
+
+#ifdef USE_TENSORRT
+    #include "NvInfer.h"
+#endif
+
+namespace op
+{
+    class OP_API NetTensorRT : public Net
+    {
+    public:
+        NetTensorRT(const std::string& caffeProto, const std::string& caffeTrainedModel, const int gpuId = 0, const bool enableGoogleLogging = true,
+                 const std::string& lastBlobName = "net_output");
+
+        virtual ~NetTensorRT();
+
+        void initializationOnThread();
+
+        void forwardPass(const Array<float>& inputNetData) const;
+
+        boost::shared_ptr<caffe::Blob<float>> getOutputBlob() const;
+    
+    private:
+#ifdef USE_TENSORRT
+        nvinfer1::ICudaEngine* caffeToGIEModel();
+        
+        nvinfer1::ICudaEngine* createEngine();
+#endif
+        // PIMPL idiom
+        // http://www.cppsamples.com/common-tasks/pimpl.html
+        struct ImplNetTensorRT;
+        std::unique_ptr<ImplNetTensorRT> upImpl;
+        
+        // PIMP requires DELETE_COPY & destructor, or extra code
+        // http://oliora.github.io/2015/12/29/pimpl-and-rule-of-zero.html
+        DELETE_COPY(NetTensorRT);
+    };
+}
+
+#endif // OPENPOSE_CORE_NET_TENSORRT_HPP
diff --git a/include/openpose/pose/headers.hpp b/include/openpose/pose/headers.hpp
index e8e28c1eb..9e23af7b7 100644
--- a/include/openpose/pose/headers.hpp
+++ b/include/openpose/pose/headers.hpp
@@ -8,6 +8,7 @@
 #include <openpose/pose/poseCpuRenderer.hpp>
 #include <openpose/pose/poseExtractor.hpp>
 #include <openpose/pose/poseExtractorCaffe.hpp>
+#include <openpose/pose/poseExtractorTensorRT.hpp>
 #include <openpose/pose/poseGpuRenderer.hpp>
 #include <openpose/pose/poseParameters.hpp>
 #include <openpose/pose/poseRenderer.hpp>
diff --git a/include/openpose/pose/poseExtractorTensorRT.hpp b/include/openpose/pose/poseExtractorTensorRT.hpp
new file mode 100644
index 000000000..48f856e70
--- /dev/null
+++ b/include/openpose/pose/poseExtractorTensorRT.hpp
@@ -0,0 +1,45 @@
+#ifndef OPENPOSE_POSE_POSE_EXTRACTOR_TENSORRT_HPP
+#define OPENPOSE_POSE_POSE_EXTRACTOR_TENSORRT_HPP
+
+#include <openpose/core/common.hpp>
+#include <openpose/pose/enumClasses.hpp>
+#include <openpose/pose/poseExtractor.hpp>
+
+namespace op
+{
+    class OP_API PoseExtractorTensorRT : public PoseExtractor
+    {
+    public:
+        PoseExtractorTensorRT(const PoseModel poseModel, const std::string& modelFolder, const int gpuId,
+                              const std::vector<HeatMapType>& heatMapTypes = {},
+                              const ScaleMode heatMapScale = ScaleMode::ZeroToOne,
+                              const bool enableGoogleLogging = true);
+
+        virtual ~PoseExtractorTensorRT();
+
+        void netInitializationOnThread();
+
+        void forwardPass(const std::vector<Array<float>>& inputNetData, const Point<int>& inputDataSize,
+                         const std::vector<double>& scaleInputToNetInputs = {1.f});
+
+        const float* getHeatMapCpuConstPtr() const;
+
+        const float* getHeatMapGpuConstPtr() const;
+
+        std::vector<int> getHeatMapSize() const;
+
+        const float* getPoseGpuConstPtr() const;
+
+    private:
+        // PIMPL idiom
+        // http://www.cppsamples.com/common-tasks/pimpl.html
+        struct ImplPoseExtractorTensorRT;
+        std::unique_ptr<ImplPoseExtractorTensorRT> upImpl;
+
+        // PIMP requires DELETE_COPY & destructor, or extra code
+        // http://oliora.github.io/2015/12/29/pimpl-and-rule-of-zero.html
+        DELETE_COPY(PoseExtractorTensorRT);
+    };
+}
+
+#endif // OPENPOSE_POSE_POSE_EXTRACTOR_TENSORRT_HPP
diff --git a/include/openpose/wrapper/wrapper.hpp b/include/openpose/wrapper/wrapper.hpp
index 9c1acb782..4ed04c1c1 100644
--- a/include/openpose/wrapper/wrapper.hpp
+++ b/include/openpose/wrapper/wrapper.hpp
@@ -619,7 +619,11 @@ namespace op
             {
                 // Pose estimators
                 for (auto gpuId = 0; gpuId < gpuNumber; gpuId++)
+#ifdef USE_TENSORRT
+                    poseExtractors.emplace_back(std::make_shared<PoseExtractorTensorRT>(
+#else
                     poseExtractors.emplace_back(std::make_shared<PoseExtractorCaffe>(
+#endif
                         wrapperStructPose.poseModel, modelFolder, gpuId + gpuNumberStart,
                         wrapperStructPose.heatMapTypes, wrapperStructPose.heatMapScale,
                         wrapperStructPose.enableGoogleLogging
diff --git a/models/pose/coco/pose_deploy_linevec.prototxt_192x256 b/models/pose/coco/pose_deploy_linevec.prototxt_192x256
new file mode 100755
index 000000000..99cc4e4fe
--- /dev/null
+++ b/models/pose/coco/pose_deploy_linevec.prototxt_192x256
@@ -0,0 +1,2976 @@
+input: "image"
+input_dim: 1
+input_dim: 3
+input_dim: 192 # This value will be defined at runtime
+input_dim: 256 # This value will be defined at runtime
+layer {
+  name: "conv1_1"
+  type: "Convolution"
+  bottom: "image"
+  top: "conv1_1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 64
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu1_1"
+  type: "ReLU"
+  bottom: "conv1_1"
+  top: "conv1_1"
+}
+layer {
+  name: "conv1_2"
+  type: "Convolution"
+  bottom: "conv1_1"
+  top: "conv1_2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 64
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu1_2"
+  type: "ReLU"
+  bottom: "conv1_2"
+  top: "conv1_2"
+}
+layer {
+  name: "pool1_stage1"
+  type: "Pooling"
+  bottom: "conv1_2"
+  top: "pool1_stage1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  name: "conv2_1"
+  type: "Convolution"
+  bottom: "pool1_stage1"
+  top: "conv2_1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu2_1"
+  type: "ReLU"
+  bottom: "conv2_1"
+  top: "conv2_1"
+}
+layer {
+  name: "conv2_2"
+  type: "Convolution"
+  bottom: "conv2_1"
+  top: "conv2_2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu2_2"
+  type: "ReLU"
+  bottom: "conv2_2"
+  top: "conv2_2"
+}
+layer {
+  name: "pool2_stage1"
+  type: "Pooling"
+  bottom: "conv2_2"
+  top: "pool2_stage1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  name: "conv3_1"
+  type: "Convolution"
+  bottom: "pool2_stage1"
+  top: "conv3_1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu3_1"
+  type: "ReLU"
+  bottom: "conv3_1"
+  top: "conv3_1"
+}
+layer {
+  name: "conv3_2"
+  type: "Convolution"
+  bottom: "conv3_1"
+  top: "conv3_2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu3_2"
+  type: "ReLU"
+  bottom: "conv3_2"
+  top: "conv3_2"
+}
+layer {
+  name: "conv3_3"
+  type: "Convolution"
+  bottom: "conv3_2"
+  top: "conv3_3"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu3_3"
+  type: "ReLU"
+  bottom: "conv3_3"
+  top: "conv3_3"
+}
+layer {
+  name: "conv3_4"
+  type: "Convolution"
+  bottom: "conv3_3"
+  top: "conv3_4"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu3_4"
+  type: "ReLU"
+  bottom: "conv3_4"
+  top: "conv3_4"
+}
+layer {
+  name: "pool3_stage1"
+  type: "Pooling"
+  bottom: "conv3_4"
+  top: "pool3_stage1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  name: "conv4_1"
+  type: "Convolution"
+  bottom: "pool3_stage1"
+  top: "conv4_1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu4_1"
+  type: "ReLU"
+  bottom: "conv4_1"
+  top: "conv4_1"
+}
+layer {
+  name: "conv4_2"
+  type: "Convolution"
+  bottom: "conv4_1"
+  top: "conv4_2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu4_2"
+  type: "ReLU"
+  bottom: "conv4_2"
+  top: "conv4_2"
+}
+layer {
+  name: "conv4_3_CPM"
+  type: "Convolution"
+  bottom: "conv4_2"
+  top: "conv4_3_CPM"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu4_3_CPM"
+  type: "ReLU"
+  bottom: "conv4_3_CPM"
+  top: "conv4_3_CPM"
+}
+layer {
+  name: "conv4_4_CPM"
+  type: "Convolution"
+  bottom: "conv4_3_CPM"
+  top: "conv4_4_CPM"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu4_4_CPM"
+  type: "ReLU"
+  bottom: "conv4_4_CPM"
+  top: "conv4_4_CPM"
+}
+layer {
+  name: "conv5_1_CPM_L1"
+  type: "Convolution"
+  bottom: "conv4_4_CPM"
+  top: "conv5_1_CPM_L1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu5_1_CPM_L1"
+  type: "ReLU"
+  bottom: "conv5_1_CPM_L1"
+  top: "conv5_1_CPM_L1"
+}
+layer {
+  name: "conv5_1_CPM_L2"
+  type: "Convolution"
+  bottom: "conv4_4_CPM"
+  top: "conv5_1_CPM_L2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu5_1_CPM_L2"
+  type: "ReLU"
+  bottom: "conv5_1_CPM_L2"
+  top: "conv5_1_CPM_L2"
+}
+layer {
+  name: "conv5_2_CPM_L1"
+  type: "Convolution"
+  bottom: "conv5_1_CPM_L1"
+  top: "conv5_2_CPM_L1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu5_2_CPM_L1"
+  type: "ReLU"
+  bottom: "conv5_2_CPM_L1"
+  top: "conv5_2_CPM_L1"
+}
+layer {
+  name: "conv5_2_CPM_L2"
+  type: "Convolution"
+  bottom: "conv5_1_CPM_L2"
+  top: "conv5_2_CPM_L2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu5_2_CPM_L2"
+  type: "ReLU"
+  bottom: "conv5_2_CPM_L2"
+  top: "conv5_2_CPM_L2"
+}
+layer {
+  name: "conv5_3_CPM_L1"
+  type: "Convolution"
+  bottom: "conv5_2_CPM_L1"
+  top: "conv5_3_CPM_L1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu5_3_CPM_L1"
+  type: "ReLU"
+  bottom: "conv5_3_CPM_L1"
+  top: "conv5_3_CPM_L1"
+}
+layer {
+  name: "conv5_3_CPM_L2"
+  type: "Convolution"
+  bottom: "conv5_2_CPM_L2"
+  top: "conv5_3_CPM_L2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu5_3_CPM_L2"
+  type: "ReLU"
+  bottom: "conv5_3_CPM_L2"
+  top: "conv5_3_CPM_L2"
+}
+layer {
+  name: "conv5_4_CPM_L1"
+  type: "Convolution"
+  bottom: "conv5_3_CPM_L1"
+  top: "conv5_4_CPM_L1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu5_4_CPM_L1"
+  type: "ReLU"
+  bottom: "conv5_4_CPM_L1"
+  top: "conv5_4_CPM_L1"
+}
+layer {
+  name: "conv5_4_CPM_L2"
+  type: "Convolution"
+  bottom: "conv5_3_CPM_L2"
+  top: "conv5_4_CPM_L2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu5_4_CPM_L2"
+  type: "ReLU"
+  bottom: "conv5_4_CPM_L2"
+  top: "conv5_4_CPM_L2"
+}
+layer {
+  name: "conv5_5_CPM_L1"
+  type: "Convolution"
+  bottom: "conv5_4_CPM_L1"
+  top: "conv5_5_CPM_L1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 38
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "conv5_5_CPM_L2"
+  type: "Convolution"
+  bottom: "conv5_4_CPM_L2"
+  top: "conv5_5_CPM_L2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 19
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "concat_stage2"
+  type: "Concat"
+  bottom: "conv5_5_CPM_L1"
+  bottom: "conv5_5_CPM_L2"
+  bottom: "conv4_4_CPM"
+  top: "concat_stage2"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "Mconv1_stage2_L1"
+  type: "Convolution"
+  bottom: "concat_stage2"
+  top: "Mconv1_stage2_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage2_L1"
+  type: "ReLU"
+  bottom: "Mconv1_stage2_L1"
+  top: "Mconv1_stage2_L1"
+}
+layer {
+  name: "Mconv1_stage2_L2"
+  type: "Convolution"
+  bottom: "concat_stage2"
+  top: "Mconv1_stage2_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage2_L2"
+  type: "ReLU"
+  bottom: "Mconv1_stage2_L2"
+  top: "Mconv1_stage2_L2"
+}
+layer {
+  name: "Mconv2_stage2_L1"
+  type: "Convolution"
+  bottom: "Mconv1_stage2_L1"
+  top: "Mconv2_stage2_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage2_L1"
+  type: "ReLU"
+  bottom: "Mconv2_stage2_L1"
+  top: "Mconv2_stage2_L1"
+}
+layer {
+  name: "Mconv2_stage2_L2"
+  type: "Convolution"
+  bottom: "Mconv1_stage2_L2"
+  top: "Mconv2_stage2_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage2_L2"
+  type: "ReLU"
+  bottom: "Mconv2_stage2_L2"
+  top: "Mconv2_stage2_L2"
+}
+layer {
+  name: "Mconv3_stage2_L1"
+  type: "Convolution"
+  bottom: "Mconv2_stage2_L1"
+  top: "Mconv3_stage2_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage2_L1"
+  type: "ReLU"
+  bottom: "Mconv3_stage2_L1"
+  top: "Mconv3_stage2_L1"
+}
+layer {
+  name: "Mconv3_stage2_L2"
+  type: "Convolution"
+  bottom: "Mconv2_stage2_L2"
+  top: "Mconv3_stage2_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage2_L2"
+  type: "ReLU"
+  bottom: "Mconv3_stage2_L2"
+  top: "Mconv3_stage2_L2"
+}
+layer {
+  name: "Mconv4_stage2_L1"
+  type: "Convolution"
+  bottom: "Mconv3_stage2_L1"
+  top: "Mconv4_stage2_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage2_L1"
+  type: "ReLU"
+  bottom: "Mconv4_stage2_L1"
+  top: "Mconv4_stage2_L1"
+}
+layer {
+  name: "Mconv4_stage2_L2"
+  type: "Convolution"
+  bottom: "Mconv3_stage2_L2"
+  top: "Mconv4_stage2_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage2_L2"
+  type: "ReLU"
+  bottom: "Mconv4_stage2_L2"
+  top: "Mconv4_stage2_L2"
+}
+layer {
+  name: "Mconv5_stage2_L1"
+  type: "Convolution"
+  bottom: "Mconv4_stage2_L1"
+  top: "Mconv5_stage2_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage2_L1"
+  type: "ReLU"
+  bottom: "Mconv5_stage2_L1"
+  top: "Mconv5_stage2_L1"
+}
+layer {
+  name: "Mconv5_stage2_L2"
+  type: "Convolution"
+  bottom: "Mconv4_stage2_L2"
+  top: "Mconv5_stage2_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage2_L2"
+  type: "ReLU"
+  bottom: "Mconv5_stage2_L2"
+  top: "Mconv5_stage2_L2"
+}
+layer {
+  name: "Mconv6_stage2_L1"
+  type: "Convolution"
+  bottom: "Mconv5_stage2_L1"
+  top: "Mconv6_stage2_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage2_L1"
+  type: "ReLU"
+  bottom: "Mconv6_stage2_L1"
+  top: "Mconv6_stage2_L1"
+}
+layer {
+  name: "Mconv6_stage2_L2"
+  type: "Convolution"
+  bottom: "Mconv5_stage2_L2"
+  top: "Mconv6_stage2_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage2_L2"
+  type: "ReLU"
+  bottom: "Mconv6_stage2_L2"
+  top: "Mconv6_stage2_L2"
+}
+layer {
+  name: "Mconv7_stage2_L1"
+  type: "Convolution"
+  bottom: "Mconv6_stage2_L1"
+  top: "Mconv7_stage2_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 38
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mconv7_stage2_L2"
+  type: "Convolution"
+  bottom: "Mconv6_stage2_L2"
+  top: "Mconv7_stage2_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 19
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "concat_stage3"
+  type: "Concat"
+  bottom: "Mconv7_stage2_L1"
+  bottom: "Mconv7_stage2_L2"
+  bottom: "conv4_4_CPM"
+  top: "concat_stage3"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "Mconv1_stage3_L1"
+  type: "Convolution"
+  bottom: "concat_stage3"
+  top: "Mconv1_stage3_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage3_L1"
+  type: "ReLU"
+  bottom: "Mconv1_stage3_L1"
+  top: "Mconv1_stage3_L1"
+}
+layer {
+  name: "Mconv1_stage3_L2"
+  type: "Convolution"
+  bottom: "concat_stage3"
+  top: "Mconv1_stage3_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage3_L2"
+  type: "ReLU"
+  bottom: "Mconv1_stage3_L2"
+  top: "Mconv1_stage3_L2"
+}
+layer {
+  name: "Mconv2_stage3_L1"
+  type: "Convolution"
+  bottom: "Mconv1_stage3_L1"
+  top: "Mconv2_stage3_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage3_L1"
+  type: "ReLU"
+  bottom: "Mconv2_stage3_L1"
+  top: "Mconv2_stage3_L1"
+}
+layer {
+  name: "Mconv2_stage3_L2"
+  type: "Convolution"
+  bottom: "Mconv1_stage3_L2"
+  top: "Mconv2_stage3_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage3_L2"
+  type: "ReLU"
+  bottom: "Mconv2_stage3_L2"
+  top: "Mconv2_stage3_L2"
+}
+layer {
+  name: "Mconv3_stage3_L1"
+  type: "Convolution"
+  bottom: "Mconv2_stage3_L1"
+  top: "Mconv3_stage3_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage3_L1"
+  type: "ReLU"
+  bottom: "Mconv3_stage3_L1"
+  top: "Mconv3_stage3_L1"
+}
+layer {
+  name: "Mconv3_stage3_L2"
+  type: "Convolution"
+  bottom: "Mconv2_stage3_L2"
+  top: "Mconv3_stage3_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage3_L2"
+  type: "ReLU"
+  bottom: "Mconv3_stage3_L2"
+  top: "Mconv3_stage3_L2"
+}
+layer {
+  name: "Mconv4_stage3_L1"
+  type: "Convolution"
+  bottom: "Mconv3_stage3_L1"
+  top: "Mconv4_stage3_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage3_L1"
+  type: "ReLU"
+  bottom: "Mconv4_stage3_L1"
+  top: "Mconv4_stage3_L1"
+}
+layer {
+  name: "Mconv4_stage3_L2"
+  type: "Convolution"
+  bottom: "Mconv3_stage3_L2"
+  top: "Mconv4_stage3_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage3_L2"
+  type: "ReLU"
+  bottom: "Mconv4_stage3_L2"
+  top: "Mconv4_stage3_L2"
+}
+layer {
+  name: "Mconv5_stage3_L1"
+  type: "Convolution"
+  bottom: "Mconv4_stage3_L1"
+  top: "Mconv5_stage3_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage3_L1"
+  type: "ReLU"
+  bottom: "Mconv5_stage3_L1"
+  top: "Mconv5_stage3_L1"
+}
+layer {
+  name: "Mconv5_stage3_L2"
+  type: "Convolution"
+  bottom: "Mconv4_stage3_L2"
+  top: "Mconv5_stage3_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage3_L2"
+  type: "ReLU"
+  bottom: "Mconv5_stage3_L2"
+  top: "Mconv5_stage3_L2"
+}
+layer {
+  name: "Mconv6_stage3_L1"
+  type: "Convolution"
+  bottom: "Mconv5_stage3_L1"
+  top: "Mconv6_stage3_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage3_L1"
+  type: "ReLU"
+  bottom: "Mconv6_stage3_L1"
+  top: "Mconv6_stage3_L1"
+}
+layer {
+  name: "Mconv6_stage3_L2"
+  type: "Convolution"
+  bottom: "Mconv5_stage3_L2"
+  top: "Mconv6_stage3_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage3_L2"
+  type: "ReLU"
+  bottom: "Mconv6_stage3_L2"
+  top: "Mconv6_stage3_L2"
+}
+layer {
+  name: "Mconv7_stage3_L1"
+  type: "Convolution"
+  bottom: "Mconv6_stage3_L1"
+  top: "Mconv7_stage3_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 38
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mconv7_stage3_L2"
+  type: "Convolution"
+  bottom: "Mconv6_stage3_L2"
+  top: "Mconv7_stage3_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 19
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "concat_stage4"
+  type: "Concat"
+  bottom: "Mconv7_stage3_L1"
+  bottom: "Mconv7_stage3_L2"
+  bottom: "conv4_4_CPM"
+  top: "concat_stage4"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "Mconv1_stage4_L1"
+  type: "Convolution"
+  bottom: "concat_stage4"
+  top: "Mconv1_stage4_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage4_L1"
+  type: "ReLU"
+  bottom: "Mconv1_stage4_L1"
+  top: "Mconv1_stage4_L1"
+}
+layer {
+  name: "Mconv1_stage4_L2"
+  type: "Convolution"
+  bottom: "concat_stage4"
+  top: "Mconv1_stage4_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage4_L2"
+  type: "ReLU"
+  bottom: "Mconv1_stage4_L2"
+  top: "Mconv1_stage4_L2"
+}
+layer {
+  name: "Mconv2_stage4_L1"
+  type: "Convolution"
+  bottom: "Mconv1_stage4_L1"
+  top: "Mconv2_stage4_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage4_L1"
+  type: "ReLU"
+  bottom: "Mconv2_stage4_L1"
+  top: "Mconv2_stage4_L1"
+}
+layer {
+  name: "Mconv2_stage4_L2"
+  type: "Convolution"
+  bottom: "Mconv1_stage4_L2"
+  top: "Mconv2_stage4_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage4_L2"
+  type: "ReLU"
+  bottom: "Mconv2_stage4_L2"
+  top: "Mconv2_stage4_L2"
+}
+layer {
+  name: "Mconv3_stage4_L1"
+  type: "Convolution"
+  bottom: "Mconv2_stage4_L1"
+  top: "Mconv3_stage4_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage4_L1"
+  type: "ReLU"
+  bottom: "Mconv3_stage4_L1"
+  top: "Mconv3_stage4_L1"
+}
+layer {
+  name: "Mconv3_stage4_L2"
+  type: "Convolution"
+  bottom: "Mconv2_stage4_L2"
+  top: "Mconv3_stage4_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage4_L2"
+  type: "ReLU"
+  bottom: "Mconv3_stage4_L2"
+  top: "Mconv3_stage4_L2"
+}
+layer {
+  name: "Mconv4_stage4_L1"
+  type: "Convolution"
+  bottom: "Mconv3_stage4_L1"
+  top: "Mconv4_stage4_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage4_L1"
+  type: "ReLU"
+  bottom: "Mconv4_stage4_L1"
+  top: "Mconv4_stage4_L1"
+}
+layer {
+  name: "Mconv4_stage4_L2"
+  type: "Convolution"
+  bottom: "Mconv3_stage4_L2"
+  top: "Mconv4_stage4_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage4_L2"
+  type: "ReLU"
+  bottom: "Mconv4_stage4_L2"
+  top: "Mconv4_stage4_L2"
+}
+layer {
+  name: "Mconv5_stage4_L1"
+  type: "Convolution"
+  bottom: "Mconv4_stage4_L1"
+  top: "Mconv5_stage4_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage4_L1"
+  type: "ReLU"
+  bottom: "Mconv5_stage4_L1"
+  top: "Mconv5_stage4_L1"
+}
+layer {
+  name: "Mconv5_stage4_L2"
+  type: "Convolution"
+  bottom: "Mconv4_stage4_L2"
+  top: "Mconv5_stage4_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage4_L2"
+  type: "ReLU"
+  bottom: "Mconv5_stage4_L2"
+  top: "Mconv5_stage4_L2"
+}
+layer {
+  name: "Mconv6_stage4_L1"
+  type: "Convolution"
+  bottom: "Mconv5_stage4_L1"
+  top: "Mconv6_stage4_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage4_L1"
+  type: "ReLU"
+  bottom: "Mconv6_stage4_L1"
+  top: "Mconv6_stage4_L1"
+}
+layer {
+  name: "Mconv6_stage4_L2"
+  type: "Convolution"
+  bottom: "Mconv5_stage4_L2"
+  top: "Mconv6_stage4_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage4_L2"
+  type: "ReLU"
+  bottom: "Mconv6_stage4_L2"
+  top: "Mconv6_stage4_L2"
+}
+layer {
+  name: "Mconv7_stage4_L1"
+  type: "Convolution"
+  bottom: "Mconv6_stage4_L1"
+  top: "Mconv7_stage4_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 38
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mconv7_stage4_L2"
+  type: "Convolution"
+  bottom: "Mconv6_stage4_L2"
+  top: "Mconv7_stage4_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 19
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "concat_stage5"
+  type: "Concat"
+  bottom: "Mconv7_stage4_L1"
+  bottom: "Mconv7_stage4_L2"
+  bottom: "conv4_4_CPM"
+  top: "concat_stage5"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "Mconv1_stage5_L1"
+  type: "Convolution"
+  bottom: "concat_stage5"
+  top: "Mconv1_stage5_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage5_L1"
+  type: "ReLU"
+  bottom: "Mconv1_stage5_L1"
+  top: "Mconv1_stage5_L1"
+}
+layer {
+  name: "Mconv1_stage5_L2"
+  type: "Convolution"
+  bottom: "concat_stage5"
+  top: "Mconv1_stage5_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage5_L2"
+  type: "ReLU"
+  bottom: "Mconv1_stage5_L2"
+  top: "Mconv1_stage5_L2"
+}
+layer {
+  name: "Mconv2_stage5_L1"
+  type: "Convolution"
+  bottom: "Mconv1_stage5_L1"
+  top: "Mconv2_stage5_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage5_L1"
+  type: "ReLU"
+  bottom: "Mconv2_stage5_L1"
+  top: "Mconv2_stage5_L1"
+}
+layer {
+  name: "Mconv2_stage5_L2"
+  type: "Convolution"
+  bottom: "Mconv1_stage5_L2"
+  top: "Mconv2_stage5_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage5_L2"
+  type: "ReLU"
+  bottom: "Mconv2_stage5_L2"
+  top: "Mconv2_stage5_L2"
+}
+layer {
+  name: "Mconv3_stage5_L1"
+  type: "Convolution"
+  bottom: "Mconv2_stage5_L1"
+  top: "Mconv3_stage5_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage5_L1"
+  type: "ReLU"
+  bottom: "Mconv3_stage5_L1"
+  top: "Mconv3_stage5_L1"
+}
+layer {
+  name: "Mconv3_stage5_L2"
+  type: "Convolution"
+  bottom: "Mconv2_stage5_L2"
+  top: "Mconv3_stage5_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage5_L2"
+  type: "ReLU"
+  bottom: "Mconv3_stage5_L2"
+  top: "Mconv3_stage5_L2"
+}
+layer {
+  name: "Mconv4_stage5_L1"
+  type: "Convolution"
+  bottom: "Mconv3_stage5_L1"
+  top: "Mconv4_stage5_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage5_L1"
+  type: "ReLU"
+  bottom: "Mconv4_stage5_L1"
+  top: "Mconv4_stage5_L1"
+}
+layer {
+  name: "Mconv4_stage5_L2"
+  type: "Convolution"
+  bottom: "Mconv3_stage5_L2"
+  top: "Mconv4_stage5_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage5_L2"
+  type: "ReLU"
+  bottom: "Mconv4_stage5_L2"
+  top: "Mconv4_stage5_L2"
+}
+layer {
+  name: "Mconv5_stage5_L1"
+  type: "Convolution"
+  bottom: "Mconv4_stage5_L1"
+  top: "Mconv5_stage5_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage5_L1"
+  type: "ReLU"
+  bottom: "Mconv5_stage5_L1"
+  top: "Mconv5_stage5_L1"
+}
+layer {
+  name: "Mconv5_stage5_L2"
+  type: "Convolution"
+  bottom: "Mconv4_stage5_L2"
+  top: "Mconv5_stage5_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage5_L2"
+  type: "ReLU"
+  bottom: "Mconv5_stage5_L2"
+  top: "Mconv5_stage5_L2"
+}
+layer {
+  name: "Mconv6_stage5_L1"
+  type: "Convolution"
+  bottom: "Mconv5_stage5_L1"
+  top: "Mconv6_stage5_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage5_L1"
+  type: "ReLU"
+  bottom: "Mconv6_stage5_L1"
+  top: "Mconv6_stage5_L1"
+}
+layer {
+  name: "Mconv6_stage5_L2"
+  type: "Convolution"
+  bottom: "Mconv5_stage5_L2"
+  top: "Mconv6_stage5_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage5_L2"
+  type: "ReLU"
+  bottom: "Mconv6_stage5_L2"
+  top: "Mconv6_stage5_L2"
+}
+layer {
+  name: "Mconv7_stage5_L1"
+  type: "Convolution"
+  bottom: "Mconv6_stage5_L1"
+  top: "Mconv7_stage5_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 38
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mconv7_stage5_L2"
+  type: "Convolution"
+  bottom: "Mconv6_stage5_L2"
+  top: "Mconv7_stage5_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 19
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "concat_stage6"
+  type: "Concat"
+  bottom: "Mconv7_stage5_L1"
+  bottom: "Mconv7_stage5_L2"
+  bottom: "conv4_4_CPM"
+  top: "concat_stage6"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "Mconv1_stage6_L1"
+  type: "Convolution"
+  bottom: "concat_stage6"
+  top: "Mconv1_stage6_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage6_L1"
+  type: "ReLU"
+  bottom: "Mconv1_stage6_L1"
+  top: "Mconv1_stage6_L1"
+}
+layer {
+  name: "Mconv1_stage6_L2"
+  type: "Convolution"
+  bottom: "concat_stage6"
+  top: "Mconv1_stage6_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage6_L2"
+  type: "ReLU"
+  bottom: "Mconv1_stage6_L2"
+  top: "Mconv1_stage6_L2"
+}
+layer {
+  name: "Mconv2_stage6_L1"
+  type: "Convolution"
+  bottom: "Mconv1_stage6_L1"
+  top: "Mconv2_stage6_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage6_L1"
+  type: "ReLU"
+  bottom: "Mconv2_stage6_L1"
+  top: "Mconv2_stage6_L1"
+}
+layer {
+  name: "Mconv2_stage6_L2"
+  type: "Convolution"
+  bottom: "Mconv1_stage6_L2"
+  top: "Mconv2_stage6_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage6_L2"
+  type: "ReLU"
+  bottom: "Mconv2_stage6_L2"
+  top: "Mconv2_stage6_L2"
+}
+layer {
+  name: "Mconv3_stage6_L1"
+  type: "Convolution"
+  bottom: "Mconv2_stage6_L1"
+  top: "Mconv3_stage6_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage6_L1"
+  type: "ReLU"
+  bottom: "Mconv3_stage6_L1"
+  top: "Mconv3_stage6_L1"
+}
+layer {
+  name: "Mconv3_stage6_L2"
+  type: "Convolution"
+  bottom: "Mconv2_stage6_L2"
+  top: "Mconv3_stage6_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage6_L2"
+  type: "ReLU"
+  bottom: "Mconv3_stage6_L2"
+  top: "Mconv3_stage6_L2"
+}
+layer {
+  name: "Mconv4_stage6_L1"
+  type: "Convolution"
+  bottom: "Mconv3_stage6_L1"
+  top: "Mconv4_stage6_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage6_L1"
+  type: "ReLU"
+  bottom: "Mconv4_stage6_L1"
+  top: "Mconv4_stage6_L1"
+}
+layer {
+  name: "Mconv4_stage6_L2"
+  type: "Convolution"
+  bottom: "Mconv3_stage6_L2"
+  top: "Mconv4_stage6_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage6_L2"
+  type: "ReLU"
+  bottom: "Mconv4_stage6_L2"
+  top: "Mconv4_stage6_L2"
+}
+layer {
+  name: "Mconv5_stage6_L1"
+  type: "Convolution"
+  bottom: "Mconv4_stage6_L1"
+  top: "Mconv5_stage6_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage6_L1"
+  type: "ReLU"
+  bottom: "Mconv5_stage6_L1"
+  top: "Mconv5_stage6_L1"
+}
+layer {
+  name: "Mconv5_stage6_L2"
+  type: "Convolution"
+  bottom: "Mconv4_stage6_L2"
+  top: "Mconv5_stage6_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage6_L2"
+  type: "ReLU"
+  bottom: "Mconv5_stage6_L2"
+  top: "Mconv5_stage6_L2"
+}
+layer {
+  name: "Mconv6_stage6_L1"
+  type: "Convolution"
+  bottom: "Mconv5_stage6_L1"
+  top: "Mconv6_stage6_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage6_L1"
+  type: "ReLU"
+  bottom: "Mconv6_stage6_L1"
+  top: "Mconv6_stage6_L1"
+}
+layer {
+  name: "Mconv6_stage6_L2"
+  type: "Convolution"
+  bottom: "Mconv5_stage6_L2"
+  top: "Mconv6_stage6_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage6_L2"
+  type: "ReLU"
+  bottom: "Mconv6_stage6_L2"
+  top: "Mconv6_stage6_L2"
+}
+layer {
+  name: "Mconv7_stage6_L1"
+  type: "Convolution"
+  bottom: "Mconv6_stage6_L1"
+  top: "Mconv7_stage6_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 38
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mconv7_stage6_L2"
+  type: "Convolution"
+  bottom: "Mconv6_stage6_L2"
+  top: "Mconv7_stage6_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 19
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "concat_stage7"
+  type: "Concat"
+  bottom: "Mconv7_stage6_L2"
+  bottom: "Mconv7_stage6_L1"
+  # top: "concat_stage7"
+  top: "net_output"
+  concat_param {
+    axis: 1
+  }
+}
diff --git a/models/pose/coco/pose_deploy_linevec.prototxt_368x656 b/models/pose/coco/pose_deploy_linevec.prototxt_368x656
new file mode 100755
index 000000000..c310c8785
--- /dev/null
+++ b/models/pose/coco/pose_deploy_linevec.prototxt_368x656
@@ -0,0 +1,2976 @@
+input: "image"
+input_dim: 1
+input_dim: 3
+input_dim: 368 # This value will be defined at runtime
+input_dim: 656 # This value will be defined at runtime
+layer {
+  name: "conv1_1"
+  type: "Convolution"
+  bottom: "image"
+  top: "conv1_1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 64
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu1_1"
+  type: "ReLU"
+  bottom: "conv1_1"
+  top: "conv1_1"
+}
+layer {
+  name: "conv1_2"
+  type: "Convolution"
+  bottom: "conv1_1"
+  top: "conv1_2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 64
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu1_2"
+  type: "ReLU"
+  bottom: "conv1_2"
+  top: "conv1_2"
+}
+layer {
+  name: "pool1_stage1"
+  type: "Pooling"
+  bottom: "conv1_2"
+  top: "pool1_stage1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  name: "conv2_1"
+  type: "Convolution"
+  bottom: "pool1_stage1"
+  top: "conv2_1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu2_1"
+  type: "ReLU"
+  bottom: "conv2_1"
+  top: "conv2_1"
+}
+layer {
+  name: "conv2_2"
+  type: "Convolution"
+  bottom: "conv2_1"
+  top: "conv2_2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu2_2"
+  type: "ReLU"
+  bottom: "conv2_2"
+  top: "conv2_2"
+}
+layer {
+  name: "pool2_stage1"
+  type: "Pooling"
+  bottom: "conv2_2"
+  top: "pool2_stage1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  name: "conv3_1"
+  type: "Convolution"
+  bottom: "pool2_stage1"
+  top: "conv3_1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu3_1"
+  type: "ReLU"
+  bottom: "conv3_1"
+  top: "conv3_1"
+}
+layer {
+  name: "conv3_2"
+  type: "Convolution"
+  bottom: "conv3_1"
+  top: "conv3_2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu3_2"
+  type: "ReLU"
+  bottom: "conv3_2"
+  top: "conv3_2"
+}
+layer {
+  name: "conv3_3"
+  type: "Convolution"
+  bottom: "conv3_2"
+  top: "conv3_3"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu3_3"
+  type: "ReLU"
+  bottom: "conv3_3"
+  top: "conv3_3"
+}
+layer {
+  name: "conv3_4"
+  type: "Convolution"
+  bottom: "conv3_3"
+  top: "conv3_4"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu3_4"
+  type: "ReLU"
+  bottom: "conv3_4"
+  top: "conv3_4"
+}
+layer {
+  name: "pool3_stage1"
+  type: "Pooling"
+  bottom: "conv3_4"
+  top: "pool3_stage1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  name: "conv4_1"
+  type: "Convolution"
+  bottom: "pool3_stage1"
+  top: "conv4_1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu4_1"
+  type: "ReLU"
+  bottom: "conv4_1"
+  top: "conv4_1"
+}
+layer {
+  name: "conv4_2"
+  type: "Convolution"
+  bottom: "conv4_1"
+  top: "conv4_2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu4_2"
+  type: "ReLU"
+  bottom: "conv4_2"
+  top: "conv4_2"
+}
+layer {
+  name: "conv4_3_CPM"
+  type: "Convolution"
+  bottom: "conv4_2"
+  top: "conv4_3_CPM"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu4_3_CPM"
+  type: "ReLU"
+  bottom: "conv4_3_CPM"
+  top: "conv4_3_CPM"
+}
+layer {
+  name: "conv4_4_CPM"
+  type: "Convolution"
+  bottom: "conv4_3_CPM"
+  top: "conv4_4_CPM"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu4_4_CPM"
+  type: "ReLU"
+  bottom: "conv4_4_CPM"
+  top: "conv4_4_CPM"
+}
+layer {
+  name: "conv5_1_CPM_L1"
+  type: "Convolution"
+  bottom: "conv4_4_CPM"
+  top: "conv5_1_CPM_L1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu5_1_CPM_L1"
+  type: "ReLU"
+  bottom: "conv5_1_CPM_L1"
+  top: "conv5_1_CPM_L1"
+}
+layer {
+  name: "conv5_1_CPM_L2"
+  type: "Convolution"
+  bottom: "conv4_4_CPM"
+  top: "conv5_1_CPM_L2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu5_1_CPM_L2"
+  type: "ReLU"
+  bottom: "conv5_1_CPM_L2"
+  top: "conv5_1_CPM_L2"
+}
+layer {
+  name: "conv5_2_CPM_L1"
+  type: "Convolution"
+  bottom: "conv5_1_CPM_L1"
+  top: "conv5_2_CPM_L1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu5_2_CPM_L1"
+  type: "ReLU"
+  bottom: "conv5_2_CPM_L1"
+  top: "conv5_2_CPM_L1"
+}
+layer {
+  name: "conv5_2_CPM_L2"
+  type: "Convolution"
+  bottom: "conv5_1_CPM_L2"
+  top: "conv5_2_CPM_L2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu5_2_CPM_L2"
+  type: "ReLU"
+  bottom: "conv5_2_CPM_L2"
+  top: "conv5_2_CPM_L2"
+}
+layer {
+  name: "conv5_3_CPM_L1"
+  type: "Convolution"
+  bottom: "conv5_2_CPM_L1"
+  top: "conv5_3_CPM_L1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu5_3_CPM_L1"
+  type: "ReLU"
+  bottom: "conv5_3_CPM_L1"
+  top: "conv5_3_CPM_L1"
+}
+layer {
+  name: "conv5_3_CPM_L2"
+  type: "Convolution"
+  bottom: "conv5_2_CPM_L2"
+  top: "conv5_3_CPM_L2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu5_3_CPM_L2"
+  type: "ReLU"
+  bottom: "conv5_3_CPM_L2"
+  top: "conv5_3_CPM_L2"
+}
+layer {
+  name: "conv5_4_CPM_L1"
+  type: "Convolution"
+  bottom: "conv5_3_CPM_L1"
+  top: "conv5_4_CPM_L1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu5_4_CPM_L1"
+  type: "ReLU"
+  bottom: "conv5_4_CPM_L1"
+  top: "conv5_4_CPM_L1"
+}
+layer {
+  name: "conv5_4_CPM_L2"
+  type: "Convolution"
+  bottom: "conv5_3_CPM_L2"
+  top: "conv5_4_CPM_L2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu5_4_CPM_L2"
+  type: "ReLU"
+  bottom: "conv5_4_CPM_L2"
+  top: "conv5_4_CPM_L2"
+}
+layer {
+  name: "conv5_5_CPM_L1"
+  type: "Convolution"
+  bottom: "conv5_4_CPM_L1"
+  top: "conv5_5_CPM_L1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 38
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "conv5_5_CPM_L2"
+  type: "Convolution"
+  bottom: "conv5_4_CPM_L2"
+  top: "conv5_5_CPM_L2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 19
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "concat_stage2"
+  type: "Concat"
+  bottom: "conv5_5_CPM_L1"
+  bottom: "conv5_5_CPM_L2"
+  bottom: "conv4_4_CPM"
+  top: "concat_stage2"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "Mconv1_stage2_L1"
+  type: "Convolution"
+  bottom: "concat_stage2"
+  top: "Mconv1_stage2_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage2_L1"
+  type: "ReLU"
+  bottom: "Mconv1_stage2_L1"
+  top: "Mconv1_stage2_L1"
+}
+layer {
+  name: "Mconv1_stage2_L2"
+  type: "Convolution"
+  bottom: "concat_stage2"
+  top: "Mconv1_stage2_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage2_L2"
+  type: "ReLU"
+  bottom: "Mconv1_stage2_L2"
+  top: "Mconv1_stage2_L2"
+}
+layer {
+  name: "Mconv2_stage2_L1"
+  type: "Convolution"
+  bottom: "Mconv1_stage2_L1"
+  top: "Mconv2_stage2_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage2_L1"
+  type: "ReLU"
+  bottom: "Mconv2_stage2_L1"
+  top: "Mconv2_stage2_L1"
+}
+layer {
+  name: "Mconv2_stage2_L2"
+  type: "Convolution"
+  bottom: "Mconv1_stage2_L2"
+  top: "Mconv2_stage2_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage2_L2"
+  type: "ReLU"
+  bottom: "Mconv2_stage2_L2"
+  top: "Mconv2_stage2_L2"
+}
+layer {
+  name: "Mconv3_stage2_L1"
+  type: "Convolution"
+  bottom: "Mconv2_stage2_L1"
+  top: "Mconv3_stage2_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage2_L1"
+  type: "ReLU"
+  bottom: "Mconv3_stage2_L1"
+  top: "Mconv3_stage2_L1"
+}
+layer {
+  name: "Mconv3_stage2_L2"
+  type: "Convolution"
+  bottom: "Mconv2_stage2_L2"
+  top: "Mconv3_stage2_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage2_L2"
+  type: "ReLU"
+  bottom: "Mconv3_stage2_L2"
+  top: "Mconv3_stage2_L2"
+}
+layer {
+  name: "Mconv4_stage2_L1"
+  type: "Convolution"
+  bottom: "Mconv3_stage2_L1"
+  top: "Mconv4_stage2_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage2_L1"
+  type: "ReLU"
+  bottom: "Mconv4_stage2_L1"
+  top: "Mconv4_stage2_L1"
+}
+layer {
+  name: "Mconv4_stage2_L2"
+  type: "Convolution"
+  bottom: "Mconv3_stage2_L2"
+  top: "Mconv4_stage2_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage2_L2"
+  type: "ReLU"
+  bottom: "Mconv4_stage2_L2"
+  top: "Mconv4_stage2_L2"
+}
+layer {
+  name: "Mconv5_stage2_L1"
+  type: "Convolution"
+  bottom: "Mconv4_stage2_L1"
+  top: "Mconv5_stage2_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage2_L1"
+  type: "ReLU"
+  bottom: "Mconv5_stage2_L1"
+  top: "Mconv5_stage2_L1"
+}
+layer {
+  name: "Mconv5_stage2_L2"
+  type: "Convolution"
+  bottom: "Mconv4_stage2_L2"
+  top: "Mconv5_stage2_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage2_L2"
+  type: "ReLU"
+  bottom: "Mconv5_stage2_L2"
+  top: "Mconv5_stage2_L2"
+}
+layer {
+  name: "Mconv6_stage2_L1"
+  type: "Convolution"
+  bottom: "Mconv5_stage2_L1"
+  top: "Mconv6_stage2_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage2_L1"
+  type: "ReLU"
+  bottom: "Mconv6_stage2_L1"
+  top: "Mconv6_stage2_L1"
+}
+layer {
+  name: "Mconv6_stage2_L2"
+  type: "Convolution"
+  bottom: "Mconv5_stage2_L2"
+  top: "Mconv6_stage2_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage2_L2"
+  type: "ReLU"
+  bottom: "Mconv6_stage2_L2"
+  top: "Mconv6_stage2_L2"
+}
+layer {
+  name: "Mconv7_stage2_L1"
+  type: "Convolution"
+  bottom: "Mconv6_stage2_L1"
+  top: "Mconv7_stage2_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 38
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mconv7_stage2_L2"
+  type: "Convolution"
+  bottom: "Mconv6_stage2_L2"
+  top: "Mconv7_stage2_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 19
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "concat_stage3"
+  type: "Concat"
+  bottom: "Mconv7_stage2_L1"
+  bottom: "Mconv7_stage2_L2"
+  bottom: "conv4_4_CPM"
+  top: "concat_stage3"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "Mconv1_stage3_L1"
+  type: "Convolution"
+  bottom: "concat_stage3"
+  top: "Mconv1_stage3_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage3_L1"
+  type: "ReLU"
+  bottom: "Mconv1_stage3_L1"
+  top: "Mconv1_stage3_L1"
+}
+layer {
+  name: "Mconv1_stage3_L2"
+  type: "Convolution"
+  bottom: "concat_stage3"
+  top: "Mconv1_stage3_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage3_L2"
+  type: "ReLU"
+  bottom: "Mconv1_stage3_L2"
+  top: "Mconv1_stage3_L2"
+}
+layer {
+  name: "Mconv2_stage3_L1"
+  type: "Convolution"
+  bottom: "Mconv1_stage3_L1"
+  top: "Mconv2_stage3_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage3_L1"
+  type: "ReLU"
+  bottom: "Mconv2_stage3_L1"
+  top: "Mconv2_stage3_L1"
+}
+layer {
+  name: "Mconv2_stage3_L2"
+  type: "Convolution"
+  bottom: "Mconv1_stage3_L2"
+  top: "Mconv2_stage3_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage3_L2"
+  type: "ReLU"
+  bottom: "Mconv2_stage3_L2"
+  top: "Mconv2_stage3_L2"
+}
+layer {
+  name: "Mconv3_stage3_L1"
+  type: "Convolution"
+  bottom: "Mconv2_stage3_L1"
+  top: "Mconv3_stage3_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage3_L1"
+  type: "ReLU"
+  bottom: "Mconv3_stage3_L1"
+  top: "Mconv3_stage3_L1"
+}
+layer {
+  name: "Mconv3_stage3_L2"
+  type: "Convolution"
+  bottom: "Mconv2_stage3_L2"
+  top: "Mconv3_stage3_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage3_L2"
+  type: "ReLU"
+  bottom: "Mconv3_stage3_L2"
+  top: "Mconv3_stage3_L2"
+}
+layer {
+  name: "Mconv4_stage3_L1"
+  type: "Convolution"
+  bottom: "Mconv3_stage3_L1"
+  top: "Mconv4_stage3_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage3_L1"
+  type: "ReLU"
+  bottom: "Mconv4_stage3_L1"
+  top: "Mconv4_stage3_L1"
+}
+layer {
+  name: "Mconv4_stage3_L2"
+  type: "Convolution"
+  bottom: "Mconv3_stage3_L2"
+  top: "Mconv4_stage3_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage3_L2"
+  type: "ReLU"
+  bottom: "Mconv4_stage3_L2"
+  top: "Mconv4_stage3_L2"
+}
+layer {
+  name: "Mconv5_stage3_L1"
+  type: "Convolution"
+  bottom: "Mconv4_stage3_L1"
+  top: "Mconv5_stage3_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage3_L1"
+  type: "ReLU"
+  bottom: "Mconv5_stage3_L1"
+  top: "Mconv5_stage3_L1"
+}
+layer {
+  name: "Mconv5_stage3_L2"
+  type: "Convolution"
+  bottom: "Mconv4_stage3_L2"
+  top: "Mconv5_stage3_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage3_L2"
+  type: "ReLU"
+  bottom: "Mconv5_stage3_L2"
+  top: "Mconv5_stage3_L2"
+}
+layer {
+  name: "Mconv6_stage3_L1"
+  type: "Convolution"
+  bottom: "Mconv5_stage3_L1"
+  top: "Mconv6_stage3_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage3_L1"
+  type: "ReLU"
+  bottom: "Mconv6_stage3_L1"
+  top: "Mconv6_stage3_L1"
+}
+layer {
+  name: "Mconv6_stage3_L2"
+  type: "Convolution"
+  bottom: "Mconv5_stage3_L2"
+  top: "Mconv6_stage3_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage3_L2"
+  type: "ReLU"
+  bottom: "Mconv6_stage3_L2"
+  top: "Mconv6_stage3_L2"
+}
+layer {
+  name: "Mconv7_stage3_L1"
+  type: "Convolution"
+  bottom: "Mconv6_stage3_L1"
+  top: "Mconv7_stage3_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 38
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mconv7_stage3_L2"
+  type: "Convolution"
+  bottom: "Mconv6_stage3_L2"
+  top: "Mconv7_stage3_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 19
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "concat_stage4"
+  type: "Concat"
+  bottom: "Mconv7_stage3_L1"
+  bottom: "Mconv7_stage3_L2"
+  bottom: "conv4_4_CPM"
+  top: "concat_stage4"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "Mconv1_stage4_L1"
+  type: "Convolution"
+  bottom: "concat_stage4"
+  top: "Mconv1_stage4_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage4_L1"
+  type: "ReLU"
+  bottom: "Mconv1_stage4_L1"
+  top: "Mconv1_stage4_L1"
+}
+layer {
+  name: "Mconv1_stage4_L2"
+  type: "Convolution"
+  bottom: "concat_stage4"
+  top: "Mconv1_stage4_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage4_L2"
+  type: "ReLU"
+  bottom: "Mconv1_stage4_L2"
+  top: "Mconv1_stage4_L2"
+}
+layer {
+  name: "Mconv2_stage4_L1"
+  type: "Convolution"
+  bottom: "Mconv1_stage4_L1"
+  top: "Mconv2_stage4_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage4_L1"
+  type: "ReLU"
+  bottom: "Mconv2_stage4_L1"
+  top: "Mconv2_stage4_L1"
+}
+layer {
+  name: "Mconv2_stage4_L2"
+  type: "Convolution"
+  bottom: "Mconv1_stage4_L2"
+  top: "Mconv2_stage4_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage4_L2"
+  type: "ReLU"
+  bottom: "Mconv2_stage4_L2"
+  top: "Mconv2_stage4_L2"
+}
+layer {
+  name: "Mconv3_stage4_L1"
+  type: "Convolution"
+  bottom: "Mconv2_stage4_L1"
+  top: "Mconv3_stage4_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage4_L1"
+  type: "ReLU"
+  bottom: "Mconv3_stage4_L1"
+  top: "Mconv3_stage4_L1"
+}
+layer {
+  name: "Mconv3_stage4_L2"
+  type: "Convolution"
+  bottom: "Mconv2_stage4_L2"
+  top: "Mconv3_stage4_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage4_L2"
+  type: "ReLU"
+  bottom: "Mconv3_stage4_L2"
+  top: "Mconv3_stage4_L2"
+}
+layer {
+  name: "Mconv4_stage4_L1"
+  type: "Convolution"
+  bottom: "Mconv3_stage4_L1"
+  top: "Mconv4_stage4_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage4_L1"
+  type: "ReLU"
+  bottom: "Mconv4_stage4_L1"
+  top: "Mconv4_stage4_L1"
+}
+layer {
+  name: "Mconv4_stage4_L2"
+  type: "Convolution"
+  bottom: "Mconv3_stage4_L2"
+  top: "Mconv4_stage4_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage4_L2"
+  type: "ReLU"
+  bottom: "Mconv4_stage4_L2"
+  top: "Mconv4_stage4_L2"
+}
+layer {
+  name: "Mconv5_stage4_L1"
+  type: "Convolution"
+  bottom: "Mconv4_stage4_L1"
+  top: "Mconv5_stage4_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage4_L1"
+  type: "ReLU"
+  bottom: "Mconv5_stage4_L1"
+  top: "Mconv5_stage4_L1"
+}
+layer {
+  name: "Mconv5_stage4_L2"
+  type: "Convolution"
+  bottom: "Mconv4_stage4_L2"
+  top: "Mconv5_stage4_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage4_L2"
+  type: "ReLU"
+  bottom: "Mconv5_stage4_L2"
+  top: "Mconv5_stage4_L2"
+}
+layer {
+  name: "Mconv6_stage4_L1"
+  type: "Convolution"
+  bottom: "Mconv5_stage4_L1"
+  top: "Mconv6_stage4_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage4_L1"
+  type: "ReLU"
+  bottom: "Mconv6_stage4_L1"
+  top: "Mconv6_stage4_L1"
+}
+layer {
+  name: "Mconv6_stage4_L2"
+  type: "Convolution"
+  bottom: "Mconv5_stage4_L2"
+  top: "Mconv6_stage4_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage4_L2"
+  type: "ReLU"
+  bottom: "Mconv6_stage4_L2"
+  top: "Mconv6_stage4_L2"
+}
+layer {
+  name: "Mconv7_stage4_L1"
+  type: "Convolution"
+  bottom: "Mconv6_stage4_L1"
+  top: "Mconv7_stage4_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 38
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mconv7_stage4_L2"
+  type: "Convolution"
+  bottom: "Mconv6_stage4_L2"
+  top: "Mconv7_stage4_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 19
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "concat_stage5"
+  type: "Concat"
+  bottom: "Mconv7_stage4_L1"
+  bottom: "Mconv7_stage4_L2"
+  bottom: "conv4_4_CPM"
+  top: "concat_stage5"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "Mconv1_stage5_L1"
+  type: "Convolution"
+  bottom: "concat_stage5"
+  top: "Mconv1_stage5_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage5_L1"
+  type: "ReLU"
+  bottom: "Mconv1_stage5_L1"
+  top: "Mconv1_stage5_L1"
+}
+layer {
+  name: "Mconv1_stage5_L2"
+  type: "Convolution"
+  bottom: "concat_stage5"
+  top: "Mconv1_stage5_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage5_L2"
+  type: "ReLU"
+  bottom: "Mconv1_stage5_L2"
+  top: "Mconv1_stage5_L2"
+}
+layer {
+  name: "Mconv2_stage5_L1"
+  type: "Convolution"
+  bottom: "Mconv1_stage5_L1"
+  top: "Mconv2_stage5_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage5_L1"
+  type: "ReLU"
+  bottom: "Mconv2_stage5_L1"
+  top: "Mconv2_stage5_L1"
+}
+layer {
+  name: "Mconv2_stage5_L2"
+  type: "Convolution"
+  bottom: "Mconv1_stage5_L2"
+  top: "Mconv2_stage5_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage5_L2"
+  type: "ReLU"
+  bottom: "Mconv2_stage5_L2"
+  top: "Mconv2_stage5_L2"
+}
+layer {
+  name: "Mconv3_stage5_L1"
+  type: "Convolution"
+  bottom: "Mconv2_stage5_L1"
+  top: "Mconv3_stage5_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage5_L1"
+  type: "ReLU"
+  bottom: "Mconv3_stage5_L1"
+  top: "Mconv3_stage5_L1"
+}
+layer {
+  name: "Mconv3_stage5_L2"
+  type: "Convolution"
+  bottom: "Mconv2_stage5_L2"
+  top: "Mconv3_stage5_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage5_L2"
+  type: "ReLU"
+  bottom: "Mconv3_stage5_L2"
+  top: "Mconv3_stage5_L2"
+}
+layer {
+  name: "Mconv4_stage5_L1"
+  type: "Convolution"
+  bottom: "Mconv3_stage5_L1"
+  top: "Mconv4_stage5_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage5_L1"
+  type: "ReLU"
+  bottom: "Mconv4_stage5_L1"
+  top: "Mconv4_stage5_L1"
+}
+layer {
+  name: "Mconv4_stage5_L2"
+  type: "Convolution"
+  bottom: "Mconv3_stage5_L2"
+  top: "Mconv4_stage5_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage5_L2"
+  type: "ReLU"
+  bottom: "Mconv4_stage5_L2"
+  top: "Mconv4_stage5_L2"
+}
+layer {
+  name: "Mconv5_stage5_L1"
+  type: "Convolution"
+  bottom: "Mconv4_stage5_L1"
+  top: "Mconv5_stage5_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage5_L1"
+  type: "ReLU"
+  bottom: "Mconv5_stage5_L1"
+  top: "Mconv5_stage5_L1"
+}
+layer {
+  name: "Mconv5_stage5_L2"
+  type: "Convolution"
+  bottom: "Mconv4_stage5_L2"
+  top: "Mconv5_stage5_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage5_L2"
+  type: "ReLU"
+  bottom: "Mconv5_stage5_L2"
+  top: "Mconv5_stage5_L2"
+}
+layer {
+  name: "Mconv6_stage5_L1"
+  type: "Convolution"
+  bottom: "Mconv5_stage5_L1"
+  top: "Mconv6_stage5_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage5_L1"
+  type: "ReLU"
+  bottom: "Mconv6_stage5_L1"
+  top: "Mconv6_stage5_L1"
+}
+layer {
+  name: "Mconv6_stage5_L2"
+  type: "Convolution"
+  bottom: "Mconv5_stage5_L2"
+  top: "Mconv6_stage5_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage5_L2"
+  type: "ReLU"
+  bottom: "Mconv6_stage5_L2"
+  top: "Mconv6_stage5_L2"
+}
+layer {
+  name: "Mconv7_stage5_L1"
+  type: "Convolution"
+  bottom: "Mconv6_stage5_L1"
+  top: "Mconv7_stage5_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 38
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mconv7_stage5_L2"
+  type: "Convolution"
+  bottom: "Mconv6_stage5_L2"
+  top: "Mconv7_stage5_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 19
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "concat_stage6"
+  type: "Concat"
+  bottom: "Mconv7_stage5_L1"
+  bottom: "Mconv7_stage5_L2"
+  bottom: "conv4_4_CPM"
+  top: "concat_stage6"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "Mconv1_stage6_L1"
+  type: "Convolution"
+  bottom: "concat_stage6"
+  top: "Mconv1_stage6_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage6_L1"
+  type: "ReLU"
+  bottom: "Mconv1_stage6_L1"
+  top: "Mconv1_stage6_L1"
+}
+layer {
+  name: "Mconv1_stage6_L2"
+  type: "Convolution"
+  bottom: "concat_stage6"
+  top: "Mconv1_stage6_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage6_L2"
+  type: "ReLU"
+  bottom: "Mconv1_stage6_L2"
+  top: "Mconv1_stage6_L2"
+}
+layer {
+  name: "Mconv2_stage6_L1"
+  type: "Convolution"
+  bottom: "Mconv1_stage6_L1"
+  top: "Mconv2_stage6_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage6_L1"
+  type: "ReLU"
+  bottom: "Mconv2_stage6_L1"
+  top: "Mconv2_stage6_L1"
+}
+layer {
+  name: "Mconv2_stage6_L2"
+  type: "Convolution"
+  bottom: "Mconv1_stage6_L2"
+  top: "Mconv2_stage6_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage6_L2"
+  type: "ReLU"
+  bottom: "Mconv2_stage6_L2"
+  top: "Mconv2_stage6_L2"
+}
+layer {
+  name: "Mconv3_stage6_L1"
+  type: "Convolution"
+  bottom: "Mconv2_stage6_L1"
+  top: "Mconv3_stage6_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage6_L1"
+  type: "ReLU"
+  bottom: "Mconv3_stage6_L1"
+  top: "Mconv3_stage6_L1"
+}
+layer {
+  name: "Mconv3_stage6_L2"
+  type: "Convolution"
+  bottom: "Mconv2_stage6_L2"
+  top: "Mconv3_stage6_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage6_L2"
+  type: "ReLU"
+  bottom: "Mconv3_stage6_L2"
+  top: "Mconv3_stage6_L2"
+}
+layer {
+  name: "Mconv4_stage6_L1"
+  type: "Convolution"
+  bottom: "Mconv3_stage6_L1"
+  top: "Mconv4_stage6_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage6_L1"
+  type: "ReLU"
+  bottom: "Mconv4_stage6_L1"
+  top: "Mconv4_stage6_L1"
+}
+layer {
+  name: "Mconv4_stage6_L2"
+  type: "Convolution"
+  bottom: "Mconv3_stage6_L2"
+  top: "Mconv4_stage6_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage6_L2"
+  type: "ReLU"
+  bottom: "Mconv4_stage6_L2"
+  top: "Mconv4_stage6_L2"
+}
+layer {
+  name: "Mconv5_stage6_L1"
+  type: "Convolution"
+  bottom: "Mconv4_stage6_L1"
+  top: "Mconv5_stage6_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage6_L1"
+  type: "ReLU"
+  bottom: "Mconv5_stage6_L1"
+  top: "Mconv5_stage6_L1"
+}
+layer {
+  name: "Mconv5_stage6_L2"
+  type: "Convolution"
+  bottom: "Mconv4_stage6_L2"
+  top: "Mconv5_stage6_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage6_L2"
+  type: "ReLU"
+  bottom: "Mconv5_stage6_L2"
+  top: "Mconv5_stage6_L2"
+}
+layer {
+  name: "Mconv6_stage6_L1"
+  type: "Convolution"
+  bottom: "Mconv5_stage6_L1"
+  top: "Mconv6_stage6_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage6_L1"
+  type: "ReLU"
+  bottom: "Mconv6_stage6_L1"
+  top: "Mconv6_stage6_L1"
+}
+layer {
+  name: "Mconv6_stage6_L2"
+  type: "Convolution"
+  bottom: "Mconv5_stage6_L2"
+  top: "Mconv6_stage6_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage6_L2"
+  type: "ReLU"
+  bottom: "Mconv6_stage6_L2"
+  top: "Mconv6_stage6_L2"
+}
+layer {
+  name: "Mconv7_stage6_L1"
+  type: "Convolution"
+  bottom: "Mconv6_stage6_L1"
+  top: "Mconv7_stage6_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 38
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mconv7_stage6_L2"
+  type: "Convolution"
+  bottom: "Mconv6_stage6_L2"
+  top: "Mconv7_stage6_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 19
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "concat_stage7"
+  type: "Concat"
+  bottom: "Mconv7_stage6_L2"
+  bottom: "Mconv7_stage6_L1"
+  # top: "concat_stage7"
+  top: "net_output"
+  concat_param {
+    axis: 1
+  }
+}
diff --git a/models/pose/coco/pose_deploy_linevec.prototxt_96x128 b/models/pose/coco/pose_deploy_linevec.prototxt_96x128
new file mode 100755
index 000000000..6e4322812
--- /dev/null
+++ b/models/pose/coco/pose_deploy_linevec.prototxt_96x128
@@ -0,0 +1,2976 @@
+input: "image"
+input_dim: 1
+input_dim: 3
+input_dim: 96 # This value will be defined at runtime
+input_dim: 128 # This value will be defined at runtime
+layer {
+  name: "conv1_1"
+  type: "Convolution"
+  bottom: "image"
+  top: "conv1_1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 64
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu1_1"
+  type: "ReLU"
+  bottom: "conv1_1"
+  top: "conv1_1"
+}
+layer {
+  name: "conv1_2"
+  type: "Convolution"
+  bottom: "conv1_1"
+  top: "conv1_2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 64
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu1_2"
+  type: "ReLU"
+  bottom: "conv1_2"
+  top: "conv1_2"
+}
+layer {
+  name: "pool1_stage1"
+  type: "Pooling"
+  bottom: "conv1_2"
+  top: "pool1_stage1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  name: "conv2_1"
+  type: "Convolution"
+  bottom: "pool1_stage1"
+  top: "conv2_1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu2_1"
+  type: "ReLU"
+  bottom: "conv2_1"
+  top: "conv2_1"
+}
+layer {
+  name: "conv2_2"
+  type: "Convolution"
+  bottom: "conv2_1"
+  top: "conv2_2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu2_2"
+  type: "ReLU"
+  bottom: "conv2_2"
+  top: "conv2_2"
+}
+layer {
+  name: "pool2_stage1"
+  type: "Pooling"
+  bottom: "conv2_2"
+  top: "pool2_stage1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  name: "conv3_1"
+  type: "Convolution"
+  bottom: "pool2_stage1"
+  top: "conv3_1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu3_1"
+  type: "ReLU"
+  bottom: "conv3_1"
+  top: "conv3_1"
+}
+layer {
+  name: "conv3_2"
+  type: "Convolution"
+  bottom: "conv3_1"
+  top: "conv3_2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu3_2"
+  type: "ReLU"
+  bottom: "conv3_2"
+  top: "conv3_2"
+}
+layer {
+  name: "conv3_3"
+  type: "Convolution"
+  bottom: "conv3_2"
+  top: "conv3_3"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu3_3"
+  type: "ReLU"
+  bottom: "conv3_3"
+  top: "conv3_3"
+}
+layer {
+  name: "conv3_4"
+  type: "Convolution"
+  bottom: "conv3_3"
+  top: "conv3_4"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu3_4"
+  type: "ReLU"
+  bottom: "conv3_4"
+  top: "conv3_4"
+}
+layer {
+  name: "pool3_stage1"
+  type: "Pooling"
+  bottom: "conv3_4"
+  top: "pool3_stage1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  name: "conv4_1"
+  type: "Convolution"
+  bottom: "pool3_stage1"
+  top: "conv4_1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu4_1"
+  type: "ReLU"
+  bottom: "conv4_1"
+  top: "conv4_1"
+}
+layer {
+  name: "conv4_2"
+  type: "Convolution"
+  bottom: "conv4_1"
+  top: "conv4_2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu4_2"
+  type: "ReLU"
+  bottom: "conv4_2"
+  top: "conv4_2"
+}
+layer {
+  name: "conv4_3_CPM"
+  type: "Convolution"
+  bottom: "conv4_2"
+  top: "conv4_3_CPM"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu4_3_CPM"
+  type: "ReLU"
+  bottom: "conv4_3_CPM"
+  top: "conv4_3_CPM"
+}
+layer {
+  name: "conv4_4_CPM"
+  type: "Convolution"
+  bottom: "conv4_3_CPM"
+  top: "conv4_4_CPM"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu4_4_CPM"
+  type: "ReLU"
+  bottom: "conv4_4_CPM"
+  top: "conv4_4_CPM"
+}
+layer {
+  name: "conv5_1_CPM_L1"
+  type: "Convolution"
+  bottom: "conv4_4_CPM"
+  top: "conv5_1_CPM_L1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu5_1_CPM_L1"
+  type: "ReLU"
+  bottom: "conv5_1_CPM_L1"
+  top: "conv5_1_CPM_L1"
+}
+layer {
+  name: "conv5_1_CPM_L2"
+  type: "Convolution"
+  bottom: "conv4_4_CPM"
+  top: "conv5_1_CPM_L2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu5_1_CPM_L2"
+  type: "ReLU"
+  bottom: "conv5_1_CPM_L2"
+  top: "conv5_1_CPM_L2"
+}
+layer {
+  name: "conv5_2_CPM_L1"
+  type: "Convolution"
+  bottom: "conv5_1_CPM_L1"
+  top: "conv5_2_CPM_L1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu5_2_CPM_L1"
+  type: "ReLU"
+  bottom: "conv5_2_CPM_L1"
+  top: "conv5_2_CPM_L1"
+}
+layer {
+  name: "conv5_2_CPM_L2"
+  type: "Convolution"
+  bottom: "conv5_1_CPM_L2"
+  top: "conv5_2_CPM_L2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu5_2_CPM_L2"
+  type: "ReLU"
+  bottom: "conv5_2_CPM_L2"
+  top: "conv5_2_CPM_L2"
+}
+layer {
+  name: "conv5_3_CPM_L1"
+  type: "Convolution"
+  bottom: "conv5_2_CPM_L1"
+  top: "conv5_3_CPM_L1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu5_3_CPM_L1"
+  type: "ReLU"
+  bottom: "conv5_3_CPM_L1"
+  top: "conv5_3_CPM_L1"
+}
+layer {
+  name: "conv5_3_CPM_L2"
+  type: "Convolution"
+  bottom: "conv5_2_CPM_L2"
+  top: "conv5_3_CPM_L2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu5_3_CPM_L2"
+  type: "ReLU"
+  bottom: "conv5_3_CPM_L2"
+  top: "conv5_3_CPM_L2"
+}
+layer {
+  name: "conv5_4_CPM_L1"
+  type: "Convolution"
+  bottom: "conv5_3_CPM_L1"
+  top: "conv5_4_CPM_L1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu5_4_CPM_L1"
+  type: "ReLU"
+  bottom: "conv5_4_CPM_L1"
+  top: "conv5_4_CPM_L1"
+}
+layer {
+  name: "conv5_4_CPM_L2"
+  type: "Convolution"
+  bottom: "conv5_3_CPM_L2"
+  top: "conv5_4_CPM_L2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "relu5_4_CPM_L2"
+  type: "ReLU"
+  bottom: "conv5_4_CPM_L2"
+  top: "conv5_4_CPM_L2"
+}
+layer {
+  name: "conv5_5_CPM_L1"
+  type: "Convolution"
+  bottom: "conv5_4_CPM_L1"
+  top: "conv5_5_CPM_L1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 38
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "conv5_5_CPM_L2"
+  type: "Convolution"
+  bottom: "conv5_4_CPM_L2"
+  top: "conv5_5_CPM_L2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 19
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "concat_stage2"
+  type: "Concat"
+  bottom: "conv5_5_CPM_L1"
+  bottom: "conv5_5_CPM_L2"
+  bottom: "conv4_4_CPM"
+  top: "concat_stage2"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "Mconv1_stage2_L1"
+  type: "Convolution"
+  bottom: "concat_stage2"
+  top: "Mconv1_stage2_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage2_L1"
+  type: "ReLU"
+  bottom: "Mconv1_stage2_L1"
+  top: "Mconv1_stage2_L1"
+}
+layer {
+  name: "Mconv1_stage2_L2"
+  type: "Convolution"
+  bottom: "concat_stage2"
+  top: "Mconv1_stage2_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage2_L2"
+  type: "ReLU"
+  bottom: "Mconv1_stage2_L2"
+  top: "Mconv1_stage2_L2"
+}
+layer {
+  name: "Mconv2_stage2_L1"
+  type: "Convolution"
+  bottom: "Mconv1_stage2_L1"
+  top: "Mconv2_stage2_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage2_L1"
+  type: "ReLU"
+  bottom: "Mconv2_stage2_L1"
+  top: "Mconv2_stage2_L1"
+}
+layer {
+  name: "Mconv2_stage2_L2"
+  type: "Convolution"
+  bottom: "Mconv1_stage2_L2"
+  top: "Mconv2_stage2_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage2_L2"
+  type: "ReLU"
+  bottom: "Mconv2_stage2_L2"
+  top: "Mconv2_stage2_L2"
+}
+layer {
+  name: "Mconv3_stage2_L1"
+  type: "Convolution"
+  bottom: "Mconv2_stage2_L1"
+  top: "Mconv3_stage2_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage2_L1"
+  type: "ReLU"
+  bottom: "Mconv3_stage2_L1"
+  top: "Mconv3_stage2_L1"
+}
+layer {
+  name: "Mconv3_stage2_L2"
+  type: "Convolution"
+  bottom: "Mconv2_stage2_L2"
+  top: "Mconv3_stage2_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage2_L2"
+  type: "ReLU"
+  bottom: "Mconv3_stage2_L2"
+  top: "Mconv3_stage2_L2"
+}
+layer {
+  name: "Mconv4_stage2_L1"
+  type: "Convolution"
+  bottom: "Mconv3_stage2_L1"
+  top: "Mconv4_stage2_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage2_L1"
+  type: "ReLU"
+  bottom: "Mconv4_stage2_L1"
+  top: "Mconv4_stage2_L1"
+}
+layer {
+  name: "Mconv4_stage2_L2"
+  type: "Convolution"
+  bottom: "Mconv3_stage2_L2"
+  top: "Mconv4_stage2_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage2_L2"
+  type: "ReLU"
+  bottom: "Mconv4_stage2_L2"
+  top: "Mconv4_stage2_L2"
+}
+layer {
+  name: "Mconv5_stage2_L1"
+  type: "Convolution"
+  bottom: "Mconv4_stage2_L1"
+  top: "Mconv5_stage2_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage2_L1"
+  type: "ReLU"
+  bottom: "Mconv5_stage2_L1"
+  top: "Mconv5_stage2_L1"
+}
+layer {
+  name: "Mconv5_stage2_L2"
+  type: "Convolution"
+  bottom: "Mconv4_stage2_L2"
+  top: "Mconv5_stage2_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage2_L2"
+  type: "ReLU"
+  bottom: "Mconv5_stage2_L2"
+  top: "Mconv5_stage2_L2"
+}
+layer {
+  name: "Mconv6_stage2_L1"
+  type: "Convolution"
+  bottom: "Mconv5_stage2_L1"
+  top: "Mconv6_stage2_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage2_L1"
+  type: "ReLU"
+  bottom: "Mconv6_stage2_L1"
+  top: "Mconv6_stage2_L1"
+}
+layer {
+  name: "Mconv6_stage2_L2"
+  type: "Convolution"
+  bottom: "Mconv5_stage2_L2"
+  top: "Mconv6_stage2_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage2_L2"
+  type: "ReLU"
+  bottom: "Mconv6_stage2_L2"
+  top: "Mconv6_stage2_L2"
+}
+layer {
+  name: "Mconv7_stage2_L1"
+  type: "Convolution"
+  bottom: "Mconv6_stage2_L1"
+  top: "Mconv7_stage2_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 38
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mconv7_stage2_L2"
+  type: "Convolution"
+  bottom: "Mconv6_stage2_L2"
+  top: "Mconv7_stage2_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 19
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "concat_stage3"
+  type: "Concat"
+  bottom: "Mconv7_stage2_L1"
+  bottom: "Mconv7_stage2_L2"
+  bottom: "conv4_4_CPM"
+  top: "concat_stage3"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "Mconv1_stage3_L1"
+  type: "Convolution"
+  bottom: "concat_stage3"
+  top: "Mconv1_stage3_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage3_L1"
+  type: "ReLU"
+  bottom: "Mconv1_stage3_L1"
+  top: "Mconv1_stage3_L1"
+}
+layer {
+  name: "Mconv1_stage3_L2"
+  type: "Convolution"
+  bottom: "concat_stage3"
+  top: "Mconv1_stage3_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage3_L2"
+  type: "ReLU"
+  bottom: "Mconv1_stage3_L2"
+  top: "Mconv1_stage3_L2"
+}
+layer {
+  name: "Mconv2_stage3_L1"
+  type: "Convolution"
+  bottom: "Mconv1_stage3_L1"
+  top: "Mconv2_stage3_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage3_L1"
+  type: "ReLU"
+  bottom: "Mconv2_stage3_L1"
+  top: "Mconv2_stage3_L1"
+}
+layer {
+  name: "Mconv2_stage3_L2"
+  type: "Convolution"
+  bottom: "Mconv1_stage3_L2"
+  top: "Mconv2_stage3_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage3_L2"
+  type: "ReLU"
+  bottom: "Mconv2_stage3_L2"
+  top: "Mconv2_stage3_L2"
+}
+layer {
+  name: "Mconv3_stage3_L1"
+  type: "Convolution"
+  bottom: "Mconv2_stage3_L1"
+  top: "Mconv3_stage3_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage3_L1"
+  type: "ReLU"
+  bottom: "Mconv3_stage3_L1"
+  top: "Mconv3_stage3_L1"
+}
+layer {
+  name: "Mconv3_stage3_L2"
+  type: "Convolution"
+  bottom: "Mconv2_stage3_L2"
+  top: "Mconv3_stage3_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage3_L2"
+  type: "ReLU"
+  bottom: "Mconv3_stage3_L2"
+  top: "Mconv3_stage3_L2"
+}
+layer {
+  name: "Mconv4_stage3_L1"
+  type: "Convolution"
+  bottom: "Mconv3_stage3_L1"
+  top: "Mconv4_stage3_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage3_L1"
+  type: "ReLU"
+  bottom: "Mconv4_stage3_L1"
+  top: "Mconv4_stage3_L1"
+}
+layer {
+  name: "Mconv4_stage3_L2"
+  type: "Convolution"
+  bottom: "Mconv3_stage3_L2"
+  top: "Mconv4_stage3_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage3_L2"
+  type: "ReLU"
+  bottom: "Mconv4_stage3_L2"
+  top: "Mconv4_stage3_L2"
+}
+layer {
+  name: "Mconv5_stage3_L1"
+  type: "Convolution"
+  bottom: "Mconv4_stage3_L1"
+  top: "Mconv5_stage3_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage3_L1"
+  type: "ReLU"
+  bottom: "Mconv5_stage3_L1"
+  top: "Mconv5_stage3_L1"
+}
+layer {
+  name: "Mconv5_stage3_L2"
+  type: "Convolution"
+  bottom: "Mconv4_stage3_L2"
+  top: "Mconv5_stage3_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage3_L2"
+  type: "ReLU"
+  bottom: "Mconv5_stage3_L2"
+  top: "Mconv5_stage3_L2"
+}
+layer {
+  name: "Mconv6_stage3_L1"
+  type: "Convolution"
+  bottom: "Mconv5_stage3_L1"
+  top: "Mconv6_stage3_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage3_L1"
+  type: "ReLU"
+  bottom: "Mconv6_stage3_L1"
+  top: "Mconv6_stage3_L1"
+}
+layer {
+  name: "Mconv6_stage3_L2"
+  type: "Convolution"
+  bottom: "Mconv5_stage3_L2"
+  top: "Mconv6_stage3_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage3_L2"
+  type: "ReLU"
+  bottom: "Mconv6_stage3_L2"
+  top: "Mconv6_stage3_L2"
+}
+layer {
+  name: "Mconv7_stage3_L1"
+  type: "Convolution"
+  bottom: "Mconv6_stage3_L1"
+  top: "Mconv7_stage3_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 38
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mconv7_stage3_L2"
+  type: "Convolution"
+  bottom: "Mconv6_stage3_L2"
+  top: "Mconv7_stage3_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 19
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "concat_stage4"
+  type: "Concat"
+  bottom: "Mconv7_stage3_L1"
+  bottom: "Mconv7_stage3_L2"
+  bottom: "conv4_4_CPM"
+  top: "concat_stage4"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "Mconv1_stage4_L1"
+  type: "Convolution"
+  bottom: "concat_stage4"
+  top: "Mconv1_stage4_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage4_L1"
+  type: "ReLU"
+  bottom: "Mconv1_stage4_L1"
+  top: "Mconv1_stage4_L1"
+}
+layer {
+  name: "Mconv1_stage4_L2"
+  type: "Convolution"
+  bottom: "concat_stage4"
+  top: "Mconv1_stage4_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage4_L2"
+  type: "ReLU"
+  bottom: "Mconv1_stage4_L2"
+  top: "Mconv1_stage4_L2"
+}
+layer {
+  name: "Mconv2_stage4_L1"
+  type: "Convolution"
+  bottom: "Mconv1_stage4_L1"
+  top: "Mconv2_stage4_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage4_L1"
+  type: "ReLU"
+  bottom: "Mconv2_stage4_L1"
+  top: "Mconv2_stage4_L1"
+}
+layer {
+  name: "Mconv2_stage4_L2"
+  type: "Convolution"
+  bottom: "Mconv1_stage4_L2"
+  top: "Mconv2_stage4_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage4_L2"
+  type: "ReLU"
+  bottom: "Mconv2_stage4_L2"
+  top: "Mconv2_stage4_L2"
+}
+layer {
+  name: "Mconv3_stage4_L1"
+  type: "Convolution"
+  bottom: "Mconv2_stage4_L1"
+  top: "Mconv3_stage4_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage4_L1"
+  type: "ReLU"
+  bottom: "Mconv3_stage4_L1"
+  top: "Mconv3_stage4_L1"
+}
+layer {
+  name: "Mconv3_stage4_L2"
+  type: "Convolution"
+  bottom: "Mconv2_stage4_L2"
+  top: "Mconv3_stage4_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage4_L2"
+  type: "ReLU"
+  bottom: "Mconv3_stage4_L2"
+  top: "Mconv3_stage4_L2"
+}
+layer {
+  name: "Mconv4_stage4_L1"
+  type: "Convolution"
+  bottom: "Mconv3_stage4_L1"
+  top: "Mconv4_stage4_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage4_L1"
+  type: "ReLU"
+  bottom: "Mconv4_stage4_L1"
+  top: "Mconv4_stage4_L1"
+}
+layer {
+  name: "Mconv4_stage4_L2"
+  type: "Convolution"
+  bottom: "Mconv3_stage4_L2"
+  top: "Mconv4_stage4_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage4_L2"
+  type: "ReLU"
+  bottom: "Mconv4_stage4_L2"
+  top: "Mconv4_stage4_L2"
+}
+layer {
+  name: "Mconv5_stage4_L1"
+  type: "Convolution"
+  bottom: "Mconv4_stage4_L1"
+  top: "Mconv5_stage4_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage4_L1"
+  type: "ReLU"
+  bottom: "Mconv5_stage4_L1"
+  top: "Mconv5_stage4_L1"
+}
+layer {
+  name: "Mconv5_stage4_L2"
+  type: "Convolution"
+  bottom: "Mconv4_stage4_L2"
+  top: "Mconv5_stage4_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage4_L2"
+  type: "ReLU"
+  bottom: "Mconv5_stage4_L2"
+  top: "Mconv5_stage4_L2"
+}
+layer {
+  name: "Mconv6_stage4_L1"
+  type: "Convolution"
+  bottom: "Mconv5_stage4_L1"
+  top: "Mconv6_stage4_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage4_L1"
+  type: "ReLU"
+  bottom: "Mconv6_stage4_L1"
+  top: "Mconv6_stage4_L1"
+}
+layer {
+  name: "Mconv6_stage4_L2"
+  type: "Convolution"
+  bottom: "Mconv5_stage4_L2"
+  top: "Mconv6_stage4_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage4_L2"
+  type: "ReLU"
+  bottom: "Mconv6_stage4_L2"
+  top: "Mconv6_stage4_L2"
+}
+layer {
+  name: "Mconv7_stage4_L1"
+  type: "Convolution"
+  bottom: "Mconv6_stage4_L1"
+  top: "Mconv7_stage4_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 38
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mconv7_stage4_L2"
+  type: "Convolution"
+  bottom: "Mconv6_stage4_L2"
+  top: "Mconv7_stage4_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 19
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "concat_stage5"
+  type: "Concat"
+  bottom: "Mconv7_stage4_L1"
+  bottom: "Mconv7_stage4_L2"
+  bottom: "conv4_4_CPM"
+  top: "concat_stage5"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "Mconv1_stage5_L1"
+  type: "Convolution"
+  bottom: "concat_stage5"
+  top: "Mconv1_stage5_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage5_L1"
+  type: "ReLU"
+  bottom: "Mconv1_stage5_L1"
+  top: "Mconv1_stage5_L1"
+}
+layer {
+  name: "Mconv1_stage5_L2"
+  type: "Convolution"
+  bottom: "concat_stage5"
+  top: "Mconv1_stage5_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage5_L2"
+  type: "ReLU"
+  bottom: "Mconv1_stage5_L2"
+  top: "Mconv1_stage5_L2"
+}
+layer {
+  name: "Mconv2_stage5_L1"
+  type: "Convolution"
+  bottom: "Mconv1_stage5_L1"
+  top: "Mconv2_stage5_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage5_L1"
+  type: "ReLU"
+  bottom: "Mconv2_stage5_L1"
+  top: "Mconv2_stage5_L1"
+}
+layer {
+  name: "Mconv2_stage5_L2"
+  type: "Convolution"
+  bottom: "Mconv1_stage5_L2"
+  top: "Mconv2_stage5_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage5_L2"
+  type: "ReLU"
+  bottom: "Mconv2_stage5_L2"
+  top: "Mconv2_stage5_L2"
+}
+layer {
+  name: "Mconv3_stage5_L1"
+  type: "Convolution"
+  bottom: "Mconv2_stage5_L1"
+  top: "Mconv3_stage5_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage5_L1"
+  type: "ReLU"
+  bottom: "Mconv3_stage5_L1"
+  top: "Mconv3_stage5_L1"
+}
+layer {
+  name: "Mconv3_stage5_L2"
+  type: "Convolution"
+  bottom: "Mconv2_stage5_L2"
+  top: "Mconv3_stage5_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage5_L2"
+  type: "ReLU"
+  bottom: "Mconv3_stage5_L2"
+  top: "Mconv3_stage5_L2"
+}
+layer {
+  name: "Mconv4_stage5_L1"
+  type: "Convolution"
+  bottom: "Mconv3_stage5_L1"
+  top: "Mconv4_stage5_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage5_L1"
+  type: "ReLU"
+  bottom: "Mconv4_stage5_L1"
+  top: "Mconv4_stage5_L1"
+}
+layer {
+  name: "Mconv4_stage5_L2"
+  type: "Convolution"
+  bottom: "Mconv3_stage5_L2"
+  top: "Mconv4_stage5_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage5_L2"
+  type: "ReLU"
+  bottom: "Mconv4_stage5_L2"
+  top: "Mconv4_stage5_L2"
+}
+layer {
+  name: "Mconv5_stage5_L1"
+  type: "Convolution"
+  bottom: "Mconv4_stage5_L1"
+  top: "Mconv5_stage5_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage5_L1"
+  type: "ReLU"
+  bottom: "Mconv5_stage5_L1"
+  top: "Mconv5_stage5_L1"
+}
+layer {
+  name: "Mconv5_stage5_L2"
+  type: "Convolution"
+  bottom: "Mconv4_stage5_L2"
+  top: "Mconv5_stage5_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage5_L2"
+  type: "ReLU"
+  bottom: "Mconv5_stage5_L2"
+  top: "Mconv5_stage5_L2"
+}
+layer {
+  name: "Mconv6_stage5_L1"
+  type: "Convolution"
+  bottom: "Mconv5_stage5_L1"
+  top: "Mconv6_stage5_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage5_L1"
+  type: "ReLU"
+  bottom: "Mconv6_stage5_L1"
+  top: "Mconv6_stage5_L1"
+}
+layer {
+  name: "Mconv6_stage5_L2"
+  type: "Convolution"
+  bottom: "Mconv5_stage5_L2"
+  top: "Mconv6_stage5_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage5_L2"
+  type: "ReLU"
+  bottom: "Mconv6_stage5_L2"
+  top: "Mconv6_stage5_L2"
+}
+layer {
+  name: "Mconv7_stage5_L1"
+  type: "Convolution"
+  bottom: "Mconv6_stage5_L1"
+  top: "Mconv7_stage5_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 38
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mconv7_stage5_L2"
+  type: "Convolution"
+  bottom: "Mconv6_stage5_L2"
+  top: "Mconv7_stage5_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 19
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "concat_stage6"
+  type: "Concat"
+  bottom: "Mconv7_stage5_L1"
+  bottom: "Mconv7_stage5_L2"
+  bottom: "conv4_4_CPM"
+  top: "concat_stage6"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "Mconv1_stage6_L1"
+  type: "Convolution"
+  bottom: "concat_stage6"
+  top: "Mconv1_stage6_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage6_L1"
+  type: "ReLU"
+  bottom: "Mconv1_stage6_L1"
+  top: "Mconv1_stage6_L1"
+}
+layer {
+  name: "Mconv1_stage6_L2"
+  type: "Convolution"
+  bottom: "concat_stage6"
+  top: "Mconv1_stage6_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu1_stage6_L2"
+  type: "ReLU"
+  bottom: "Mconv1_stage6_L2"
+  top: "Mconv1_stage6_L2"
+}
+layer {
+  name: "Mconv2_stage6_L1"
+  type: "Convolution"
+  bottom: "Mconv1_stage6_L1"
+  top: "Mconv2_stage6_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage6_L1"
+  type: "ReLU"
+  bottom: "Mconv2_stage6_L1"
+  top: "Mconv2_stage6_L1"
+}
+layer {
+  name: "Mconv2_stage6_L2"
+  type: "Convolution"
+  bottom: "Mconv1_stage6_L2"
+  top: "Mconv2_stage6_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu2_stage6_L2"
+  type: "ReLU"
+  bottom: "Mconv2_stage6_L2"
+  top: "Mconv2_stage6_L2"
+}
+layer {
+  name: "Mconv3_stage6_L1"
+  type: "Convolution"
+  bottom: "Mconv2_stage6_L1"
+  top: "Mconv3_stage6_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage6_L1"
+  type: "ReLU"
+  bottom: "Mconv3_stage6_L1"
+  top: "Mconv3_stage6_L1"
+}
+layer {
+  name: "Mconv3_stage6_L2"
+  type: "Convolution"
+  bottom: "Mconv2_stage6_L2"
+  top: "Mconv3_stage6_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu3_stage6_L2"
+  type: "ReLU"
+  bottom: "Mconv3_stage6_L2"
+  top: "Mconv3_stage6_L2"
+}
+layer {
+  name: "Mconv4_stage6_L1"
+  type: "Convolution"
+  bottom: "Mconv3_stage6_L1"
+  top: "Mconv4_stage6_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage6_L1"
+  type: "ReLU"
+  bottom: "Mconv4_stage6_L1"
+  top: "Mconv4_stage6_L1"
+}
+layer {
+  name: "Mconv4_stage6_L2"
+  type: "Convolution"
+  bottom: "Mconv3_stage6_L2"
+  top: "Mconv4_stage6_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu4_stage6_L2"
+  type: "ReLU"
+  bottom: "Mconv4_stage6_L2"
+  top: "Mconv4_stage6_L2"
+}
+layer {
+  name: "Mconv5_stage6_L1"
+  type: "Convolution"
+  bottom: "Mconv4_stage6_L1"
+  top: "Mconv5_stage6_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage6_L1"
+  type: "ReLU"
+  bottom: "Mconv5_stage6_L1"
+  top: "Mconv5_stage6_L1"
+}
+layer {
+  name: "Mconv5_stage6_L2"
+  type: "Convolution"
+  bottom: "Mconv4_stage6_L2"
+  top: "Mconv5_stage6_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 3
+    kernel_size: 7
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu5_stage6_L2"
+  type: "ReLU"
+  bottom: "Mconv5_stage6_L2"
+  top: "Mconv5_stage6_L2"
+}
+layer {
+  name: "Mconv6_stage6_L1"
+  type: "Convolution"
+  bottom: "Mconv5_stage6_L1"
+  top: "Mconv6_stage6_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage6_L1"
+  type: "ReLU"
+  bottom: "Mconv6_stage6_L1"
+  top: "Mconv6_stage6_L1"
+}
+layer {
+  name: "Mconv6_stage6_L2"
+  type: "Convolution"
+  bottom: "Mconv5_stage6_L2"
+  top: "Mconv6_stage6_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mrelu6_stage6_L2"
+  type: "ReLU"
+  bottom: "Mconv6_stage6_L2"
+  top: "Mconv6_stage6_L2"
+}
+layer {
+  name: "Mconv7_stage6_L1"
+  type: "Convolution"
+  bottom: "Mconv6_stage6_L1"
+  top: "Mconv7_stage6_L1"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 38
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "Mconv7_stage6_L2"
+  type: "Convolution"
+  bottom: "Mconv6_stage6_L2"
+  top: "Mconv7_stage6_L2"
+  param {
+    lr_mult: 4.0
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 8.0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 19
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
+}
+layer {
+  name: "concat_stage7"
+  type: "Concat"
+  bottom: "Mconv7_stage6_L2"
+  bottom: "Mconv7_stage6_L1"
+  # top: "concat_stage7"
+  top: "net_output"
+  concat_param {
+    axis: 1
+  }
+}
diff --git a/pose.sh b/pose.sh
new file mode 100755
index 000000000..14f3a4deb
--- /dev/null
+++ b/pose.sh
@@ -0,0 +1 @@
+./build/examples/openpose/openpose.bin -camera_resolution 640x480 -net_resolution 128x96
diff --git a/src/openpose/core/netTensorRT.cpp b/src/openpose/core/netTensorRT.cpp
new file mode 100644
index 000000000..ed93b3662
--- /dev/null
+++ b/src/openpose/core/netTensorRT.cpp
@@ -0,0 +1,391 @@
+#include <numeric> // std::accumulate
+#ifdef USE_TENSORRT
+    #include <atomic>
+    #include <mutex>
+    #include <caffe/net.hpp>
+    #include <glog/logging.h> // google::InitGoogleLogging
+#endif
+#include <openpose/utilities/cuda.hpp>
+#include <openpose/utilities/fileSystem.hpp>
+#include <openpose/utilities/standard.hpp>
+#include <openpose/core/netTensorRT.hpp>
+
+//#include <assert.h>
+//#include <fstream>
+//#include <sstream>
+//#include <iostream>
+//#include <cmath>
+//#include <sys/stat.h>
+//#include <cmath>
+//#include <time.h>
+//#include <cuda_runtime_api.h>
+//#include <algorithm>
+//#include <chrono>
+//#include <string.h>
+//#include <map>
+//#include <random>
+#include <boost/make_shared.hpp>
+
+
+#ifdef USE_TENSORRT
+    #include "NvInfer.h"
+    #include "NvCaffeParser.h"
+
+    using namespace nvinfer1;
+    using namespace nvcaffeparser1;
+
+    std::vector<std::string> gInputs;
+    std::map<std::string, DimsCHW> gInputDimensions;
+#endif // USE_TENSORRT
+
+// Logger for GIE info/warning/errors
+class Logger : public ILogger
+{
+    void log(Severity severity, const char* msg) override
+    {
+        // if suppress info-level message:  if (severity != Severity::kINFO)
+        std::cout << msg << std::endl;
+    }
+} gLogger;
+
+namespace op
+{
+    std::mutex sMutexNetTensorRT;
+    std::atomic<bool> sGoogleLoggingInitializedTensorRT{false}; // Already defined in netCaffe
+    
+    struct NetTensorRT::ImplNetTensorRT
+    {
+        #ifdef USE_TENSORRT
+            // Init with constructor
+            const int mGpuId;
+            const std::string mCaffeProto;
+            const std::string mCaffeTrainedModel;
+            const std::string mLastBlobName;
+            std::vector<int> mNetInputSize4D;
+            // Init with thread
+            boost::shared_ptr<caffe::Blob<float>> spInputBlob;
+            boost::shared_ptr<caffe::Blob<float>> spOutputBlob;
+        
+            // Init with constructor
+            //const std::array<int, 4> mNetInputSize4D;
+            std::vector<int> mNetOutputSize4D;
+            // Init with thread
+        
+            // TensorRT stuff
+            nvinfer1::ICudaEngine* cudaEngine;
+            nvinfer1::IExecutionContext* cudaContext;
+            //nvinfer1::ICudaEngine* caffeToGIEModel();
+            //nvinfer1::ICudaEngine* createEngine();
+            cudaStream_t stream;
+            cudaEvent_t start, end;
+    
+            ImplNetTensorRT(const std::string& caffeProto, const std::string& caffeTrainedModel, const int gpuId,
+                         const bool enableGoogleLogging, const std::string& lastBlobName) :
+                mGpuId{gpuId},
+                mCaffeProto{caffeProto + std::string("_368x656")}, // TODO, no size, how to proceed ?
+                mCaffeTrainedModel{caffeTrainedModel},
+                mLastBlobName{lastBlobName}
+            {
+                const std::string message{".\nPossible causes:\n\t1. Not downloading the OpenPose trained models."
+                    "\n\t2. Not running OpenPose from the same directory where the `model`"
+                    " folder is located.\n\t3. Using paths with spaces."};
+                if (!existFile(mCaffeProto))
+                    error("Prototxt file not found: " + mCaffeProto + message, __LINE__, __FUNCTION__, __FILE__);
+                    if (!existFile(mCaffeTrainedModel))
+                        error("Caffe trained model file not found: " + mCaffeTrainedModel + message,
+                              __LINE__, __FUNCTION__, __FILE__);
+                        // Double if condition in order to speed up the program if it is called several times
+                        if (enableGoogleLogging && !sGoogleLoggingInitializedTensorRT)
+                        {
+                            std::lock_guard<std::mutex> lock{sMutexNetTensorRT};
+                            if (enableGoogleLogging && !sGoogleLoggingInitializedTensorRT)
+                            {
+                                google::InitGoogleLogging("OpenPose");
+                                sGoogleLoggingInitializedTensorRT = true;
+                            }
+                        }
+            }
+        #endif
+    };
+    
+    
+#ifdef USE_TENSORRT
+    ICudaEngine* NetTensorRT::caffeToGIEModel()
+    {
+        // create the builder
+        IBuilder* builder = createInferBuilder(gLogger);
+        
+        // parse the caffe model to populate the network, then set the outputs
+        INetworkDefinition* network = builder->createNetwork();
+        ICaffeParser* parser = createCaffeParser();
+        const IBlobNameToTensor* blobNameToTensor = parser->parse(upImpl->mCaffeProto.c_str(),
+                                                                  upImpl->mCaffeTrainedModel.c_str(),
+                                                                  *network,
+                                                                  DataType::kFLOAT);
+        
+        if (!blobNameToTensor)
+            return nullptr;
+        
+        
+        for (int i = 0, n = network->getNbInputs(); i < n; i++)
+        {
+            DimsCHW dims = static_cast<DimsCHW&&>(network->getInput(i)->getDimensions());
+            gInputs.push_back(network->getInput(i)->getName());
+            gInputDimensions.insert(std::make_pair(network->getInput(i)->getName(), dims));
+            std::cout << "Input \"" << network->getInput(i)->getName() << "\": " << dims.c() << "x" << dims.h() << "x" << dims.w() << std::endl;
+            if( i > 0)
+                std::cerr << "Multiple output unsupported for now!";
+        }
+        
+        // Specify which tensor is output (multiple unsupported)
+        if (blobNameToTensor->find(upImpl->mLastBlobName.c_str()) == nullptr)
+        {
+            std::cout << "could not find output blob " << upImpl->mLastBlobName.c_str() << std::endl;
+            return nullptr;
+        }
+        network->markOutput(*blobNameToTensor->find(upImpl->mLastBlobName.c_str()));
+        
+        
+        for (int i = 0, n = network->getNbOutputs(); i < n; i++)
+        {
+            DimsCHW dims = static_cast<DimsCHW&&>(network->getOutput(i)->getDimensions());
+            std::cout << "Output \"" << network->getOutput(i)->getName() << "\": " << dims.c() << "x" << dims.h() << "x" << dims.w() << std::endl;
+        }
+        
+        // Build the engine
+        builder->setMaxBatchSize(1);
+        // 16 megabytes, default in giexec. No idea what's best for Jetson though,
+        // maybe check dusty_nv's code on github
+        builder->setMaxWorkspaceSize(32<<20);
+        builder->setHalf2Mode(false);
+        
+        ICudaEngine* engine = builder->buildCudaEngine(*network);
+        if (engine == nullptr)
+            std::cout << "could not build engine" << std::endl;
+        
+        parser->destroy();
+        network->destroy();
+        builder->destroy();
+        shutdownProtobufLibrary();
+        
+        return engine;
+    }
+
+    ICudaEngine* NetTensorRT::createEngine()
+    {
+        ICudaEngine *engine;
+        
+        std::string serializedEnginePath = upImpl->mCaffeProto + ".bin";
+        
+        std::cout << "Serialized engine path: " << serializedEnginePath.c_str() << std::endl;
+        if (existFile(serializedEnginePath))
+        {
+            std::cout << "Found serialized TensorRT engine, deserializing..." << std::endl;
+            char *gieModelStream{nullptr};
+            size_t size{0};
+            std::ifstream file(serializedEnginePath, std::ios::binary);
+            if (file.good())
+            {
+                file.seekg(0, file.end);
+                size = file.tellg();
+                file.seekg(0, file.beg);
+                gieModelStream = new char[size];
+                assert(gieModelStream);
+                file.read(gieModelStream, size);
+                file.close();
+            }
+            
+            IRuntime* infer = createInferRuntime(gLogger);
+            engine = infer->deserializeCudaEngine(gieModelStream, size, nullptr);
+            if (gieModelStream) delete [] gieModelStream;
+            
+            return engine;
+        }
+        else
+        {
+            engine = caffeToGIEModel();
+            if (!engine)
+            {
+                std::cerr << "Engine could not be created" << std::endl;
+                return nullptr;
+            }
+            else // serialize engine
+            {
+                std::ofstream p(serializedEnginePath);
+                if (!p)
+                {
+                    std::cerr << "could not serialize engine" << std::endl;
+                }
+                IHostMemory *ptr = engine->serialize();
+                assert(ptr);
+                p.write(reinterpret_cast<const char*>(ptr->data()), ptr->size());
+                ptr->destroy();
+            }
+        }
+        return engine;
+    }
+    
+    inline void reshapeNetTensorRT(boost::shared_ptr<caffe::Blob<float>> inputBlob, const std::vector<int>& dimensions)
+    {
+        try
+        {
+            inputBlob->Reshape(dimensions);
+            //caffeNet->Reshape(); TODO find TensorRT equivalent
+            cudaCheck(__LINE__, __FUNCTION__, __FILE__);
+        }
+        catch (const std::exception& e)
+        {
+            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
+        }
+    }
+#endif
+    
+    NetTensorRT::NetTensorRT(const std::string& caffeProto, const std::string& caffeTrainedModel, const int gpuId,
+                       const bool enableGoogleLogging, const std::string& lastBlobName)
+#ifdef USE_TENSORRT
+    : upImpl{new ImplNetTensorRT{caffeProto, caffeTrainedModel, gpuId, enableGoogleLogging,
+        lastBlobName}}
+#endif
+    {
+        try
+        {
+            #ifdef USE_TENSORRT
+                std::cout << "Caffe file: " << upImpl->mCaffeProto.c_str() << std::endl;
+                CUDA_CHECK(cudaStreamCreate(&upImpl->stream));
+                CUDA_CHECK(cudaEventCreate(&upImpl->start));
+                CUDA_CHECK(cudaEventCreate(&upImpl->end));
+            #else
+                UNUSED(netInputSize4D);
+                UNUSED(caffeProto);
+                UNUSED(caffeTrainedModel);
+                UNUSED(gpuId);
+                UNUSED(lastBlobName);
+                error("OpenPose must be compiled with the `USE_CAFFE` macro definition in order to use this"
+                      " functionality.", __LINE__, __FUNCTION__, __FILE__);
+            #endif
+        }
+        catch (const std::exception& e)
+        {
+            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
+        }
+    }
+    
+    NetTensorRT::~NetTensorRT()
+    {
+        cudaStreamDestroy(upImpl->stream);
+        cudaEventDestroy(upImpl->start);
+        cudaEventDestroy(upImpl->end);
+        
+        if (upImpl->cudaEngine)
+            upImpl->cudaEngine->destroy();
+    }
+    
+    void NetTensorRT::initializationOnThread()
+    {
+        std::cout << "InitializationOnThread : start" << std::endl;
+        try
+        {
+            #ifdef USE_TENSORRT
+                std::cout << "InitializationOnThread : setting device" << std::endl;
+                // Initialize net
+                cudaSetDevice(upImpl->mGpuId);
+            
+                std::cout << "InitializationOnThread : creating engine" << std::endl;
+            
+                upImpl->cudaEngine = createEngine();
+                if (!upImpl->cudaEngine)
+                {
+                    std::cerr << "cudaEngine could not be created" << std::endl;
+                    return;
+                }
+            
+                std::cout << "InitializationOnThread Pass : creating execution context" << std::endl;
+            
+                upImpl->cudaContext = upImpl->cudaEngine->createExecutionContext();
+                if (!upImpl->cudaContext)
+                {
+                    std::cerr << "cudaContext could not be created" << std::endl;
+                    return;
+                }
+                DimsCHW outputDims = static_cast<DimsCHW&&>(upImpl->cudaEngine->getBindingDimensions(upImpl->cudaEngine->getNbBindings() - 1));
+                upImpl->mNetOutputSize4D.push_back(1);
+                upImpl->mNetOutputSize4D.push_back(outputDims.c());           
+                upImpl->mNetOutputSize4D.push_back(outputDims.h());
+                upImpl->mNetOutputSize4D.push_back(outputDims.w()); 
+            
+                //std::cout << "NetInputSize4D: " << upImpl->mNetInputSize4D.at(0) << " " << upImpl->mNetInputSize4D.at(1) << " " << upImpl->mNetInputSize4D.at(2) << " " << upImpl->mNetInputSize4D.at(3) << std::endl;
+            
+                upImpl->spInputBlob = boost::make_shared<caffe::Blob<float>>(1, 3, 368, 656);
+                upImpl->spOutputBlob = boost::make_shared<caffe::Blob<float>>(upImpl->mNetOutputSize4D[0], upImpl->mNetOutputSize4D[1], upImpl->mNetOutputSize4D[2], upImpl->mNetOutputSize4D[3]);
+            
+                std::cout << "InitializationOnThread : done" << std::endl;
+                cudaCheck(__LINE__, __FUNCTION__, __FILE__);
+            #endif
+        }
+        catch (const std::exception& e)
+        {
+            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
+        }
+    }
+    
+    void NetTensorRT::forwardPass(const Array<float>& inputData) const
+    {
+        try
+        {
+            #ifdef USE_TENSORRT
+            // Security checks
+            if (inputData.empty())
+                error("The Array inputData cannot be empty.", __LINE__, __FUNCTION__, __FILE__);
+            if (inputData.getNumberDimensions() != 4 || inputData.getSize(1) != 3)
+                error("The Array inputData must have 4 dimensions: [batch size, 3 (RGB), height, width].",
+                      __LINE__, __FUNCTION__, __FILE__);
+            // Reshape Tensor RT net if required
+            if (!vectorsAreEqual(upImpl->mNetInputSize4D, inputData.getSize()))
+            {
+                std::cout << "Reshaping Tensor RT Net : WARNING NOT TESTED, probably won't work" << std::endl;
+                upImpl->mNetInputSize4D = inputData.getSize();
+                reshapeNetTensorRT(upImpl->spInputBlob, inputData.getSize());
+            }
+            
+            // Copy frame data to GPU memory
+            auto* gpuImagePtr = upImpl->spInputBlob->mutable_gpu_data();
+            CUDA_CHECK(cudaMemcpy(gpuImagePtr, inputData.getConstPtr(), inputData.getVolume() * sizeof(float), cudaMemcpyHostToDevice));
+            
+            // input and output buffer pointers that we pass to the engine - the engine requires exactly IEngine::getNbBindings(),
+            // of these, but in this case we know that there is exactly one input and one output.
+            std::vector<void*> buffers(2);
+            buffers[0] = upImpl->spInputBlob->mutable_gpu_data();
+            buffers[1] = upImpl->spOutputBlob->mutable_gpu_data();
+            
+            // Perform deep network forward pass
+            upImpl->cudaContext->enqueue(inputData.getSize(0), &buffers[0], upImpl->stream, nullptr);
+            
+            // Cuda checks
+            cudaCheck(__LINE__, __FUNCTION__, __FILE__);
+            
+            #endif
+        }
+        catch (const std::exception& e)
+        {
+            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
+        }
+    }
+    
+    boost::shared_ptr<caffe::Blob<float>> NetTensorRT::getOutputBlob() const
+    {
+        try
+        {
+            #ifdef USE_TENSORRT
+                return upImpl->spOutputBlob;
+            #else
+                return nullptr;
+            #endif
+        }
+        catch (const std::exception& e)
+        {
+            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
+            return nullptr;
+        }
+    }
+}
+    
diff --git a/src/openpose/pose/poseExtractorCaffe.cpp b/src/openpose/pose/poseExtractorCaffe.cpp
index 02397ce97..fc64e2a11 100644
--- a/src/openpose/pose/poseExtractorCaffe.cpp
+++ b/src/openpose/pose/poseExtractorCaffe.cpp
@@ -13,6 +13,7 @@
 #include <openpose/utilities/standard.hpp>
 #include <openpose/pose/poseExtractorCaffe.hpp>
 
+
 namespace op
 {
     struct PoseExtractorCaffe::ImplPoseExtractorCaffe
@@ -251,7 +252,8 @@ namespace op
                                                                {upImpl->spHeatMapsBlob.get()});
                     cudaCheck(__LINE__, __FUNCTION__, __FILE__);
                 #else
-                    error("ResizeAndMergeCaffe CPU version not implemented yet.", __LINE__, __FUNCTION__, __FILE__);
+                    upImpl->spResizeAndMergeCaffe->Forward_cpu({upImpl->spCaffeNetOutputBlob.get()},
+                                                               {upImpl->spHeatMapsBlob.get()});
                 #endif
 
                 // 3. Get peaks by Non-Maximum Suppression
diff --git a/src/openpose/pose/poseExtractorTensorRT.cpp b/src/openpose/pose/poseExtractorTensorRT.cpp
new file mode 100644
index 000000000..c61d17dd6
--- /dev/null
+++ b/src/openpose/pose/poseExtractorTensorRT.cpp
@@ -0,0 +1,414 @@
+#ifdef USE_CAFFE
+    #include <caffe/blob.hpp>
+#endif
+#include <openpose/core/netTensorRT.hpp>
+#include <openpose/core/nmsCaffe.hpp>
+#include <openpose/core/resizeAndMergeCaffe.hpp>
+#include <openpose/pose/bodyPartConnectorCaffe.hpp>
+#include <openpose/pose/poseParameters.hpp>
+#include <openpose/utilities/check.hpp>
+#include <openpose/utilities/cuda.hpp>
+#include <openpose/utilities/fastMath.hpp>
+#include <openpose/utilities/openCv.hpp>
+#include <openpose/utilities/standard.hpp>
+#include <openpose/pose/poseExtractorTensorRT.hpp>
+
+typedef std::vector<std::pair<std::string, std::chrono::high_resolution_clock::time_point>> OpTimings;
+
+static OpTimings timings;
+
+static void timeNow(const std::string& label){
+    const auto now = std::chrono::high_resolution_clock::now();
+    const auto timing = std::make_pair(label, now);
+    timings.push_back(timing);
+}
+
+static std::string timeDiffToString(const std::chrono::high_resolution_clock::time_point& t1,
+                                const std::chrono::high_resolution_clock::time_point& t2 ) {
+    return std::to_string((double)std::chrono::duration_cast<std::chrono::duration<double>>(t1 - t2).count() * 1e3) + " ms";
+}
+
+
+namespace op
+{
+
+    struct PoseExtractorTensorRT::ImplPoseExtractorTensorRT
+    {
+        #ifdef USE_TENSORRT // implies USE_TENSORRT for now
+            const PoseModel mPoseModel;
+            const int mGpuId;
+            const std::string mModelFolder;
+            const bool mEnableGoogleLogging;
+            // General parameters
+            std::vector<std::shared_ptr<NetTensorRT>> spTensorRTNets;
+            std::shared_ptr<ResizeAndMergeCaffe<float>> spResizeAndMergeCaffe;
+            std::shared_ptr<NmsCaffe<float>> spNmsCaffe;
+            std::shared_ptr<BodyPartConnectorCaffe<float>> spBodyPartConnectorCaffe;
+            std::vector<std::vector<int>> mNetInput4DSizes;
+            std::vector<double> mScaleInputToNetInputs;
+            // Init with thread
+            std::vector<boost::shared_ptr<caffe::Blob<float>>> spTensorRTNetOutputBlobs;
+            std::shared_ptr<caffe::Blob<float>> spHeatMapsBlob;
+            std::shared_ptr<caffe::Blob<float>> spPeaksBlob;
+            std::shared_ptr<caffe::Blob<float>> spPoseBlob;
+
+            ImplPoseExtractorTensorRT(const PoseModel poseModel, const int gpuId,
+                                      const std::string& modelFolder, const bool enableGoogleLogging) :
+                mPoseModel{poseModel},
+                mGpuId{gpuId},
+                mModelFolder{modelFolder},
+                mEnableGoogleLogging{enableGoogleLogging},
+                spResizeAndMergeCaffe{std::make_shared<ResizeAndMergeCaffe<float>>()},
+                spNmsCaffe{std::make_shared<NmsCaffe<float>>()},
+                spBodyPartConnectorCaffe{std::make_shared<BodyPartConnectorCaffe<float>>()}
+            {
+            }
+        #endif
+    };
+
+    #ifdef USE_CAFFE
+        std::vector<caffe::Blob<float>*> tensorRTNetSharedToPtr(
+                                                             std::vector<boost::shared_ptr<caffe::Blob<float>>>& caffeNetOutputBlob)
+        {
+            try
+            {
+                // Prepare spTensorRTNetOutputBlobss
+                std::vector<caffe::Blob<float>*> caffeNetOutputBlobs(caffeNetOutputBlob.size());
+                for (auto i = 0u ; i < caffeNetOutputBlobs.size() ; i++)
+                    caffeNetOutputBlobs[i] = caffeNetOutputBlob[i].get();
+                return caffeNetOutputBlobs;
+            }
+            catch (const std::exception& e)
+            {
+                error(e.what(), __LINE__, __FUNCTION__, __FILE__);
+                return {};
+            }
+        }
+    
+        inline void reshapePoseExtractorCaffe(std::shared_ptr<ResizeAndMergeCaffe<float>>& resizeAndMergeCaffe,
+                                              std::shared_ptr<NmsCaffe<float>>& nmsCaffe,
+                                              std::shared_ptr<BodyPartConnectorCaffe<float>>& bodyPartConnectorCaffe,
+                                              std::vector<boost::shared_ptr<caffe::Blob<float>>>& caffeNetOutputBlob,
+                                              std::shared_ptr<caffe::Blob<float>>& heatMapsBlob,
+                                              std::shared_ptr<caffe::Blob<float>>& peaksBlob,
+                                              std::shared_ptr<caffe::Blob<float>>& poseBlob,
+                                              const float scaleInputToNetInput,
+                                              const PoseModel poseModel)
+        {
+            try
+            {
+                // HeatMaps extractor blob and layer
+                const auto caffeNetOutputBlobs = tensorRTNetSharedToPtr(caffeNetOutputBlob);
+                resizeAndMergeCaffe->Reshape(caffeNetOutputBlobs, {heatMapsBlob.get()},
+                                             POSE_CCN_DECREASE_FACTOR[(int)poseModel], 1.f/scaleInputToNetInput);
+                // Pose extractor blob and layer
+                nmsCaffe->Reshape({heatMapsBlob.get()}, {peaksBlob.get()}, POSE_MAX_PEAKS[(int)poseModel]);
+                // Pose extractor blob and layer
+                bodyPartConnectorCaffe->Reshape({heatMapsBlob.get(), peaksBlob.get()}, {poseBlob.get()});
+                // Cuda check
+    #ifdef USE_CUDA
+                cudaCheck(__LINE__, __FUNCTION__, __FILE__);
+    #endif
+            }
+            catch (const std::exception& e)
+            {
+                error(e.what(), __LINE__, __FUNCTION__, __FILE__);
+            }
+        }
+    
+        void addTensorRTNetOnThread(std::vector<std::shared_ptr<NetTensorRT>>& netTensorRT,
+                                 std::vector<boost::shared_ptr<caffe::Blob<float>>>& caffeNetOutputBlob,
+                                 const PoseModel poseModel, const int gpuId,
+                                 const std::string& modelFolder, const bool enableGoogleLogging)
+        {
+            try
+            {
+                // Add Caffe Net
+                netTensorRT.emplace_back(
+                                      std::make_shared<NetTensorRT>(modelFolder + POSE_PROTOTXT[(int)poseModel],
+                                                                 modelFolder + POSE_TRAINED_MODEL[(int)poseModel],
+                                                                 gpuId, enableGoogleLogging)
+                                      );
+                // Initializing them on the thread
+                netTensorRT.back()->initializationOnThread();
+                caffeNetOutputBlob.emplace_back(netTensorRT.back()->getOutputBlob());
+                // Security checks
+                if (netTensorRT.size() != caffeNetOutputBlob.size())
+                    error("Weird error, this should not happen. Notify us.", __LINE__, __FUNCTION__, __FILE__);
+                // Cuda check
+    #ifdef USE_CUDA
+                cudaCheck(__LINE__, __FUNCTION__, __FILE__);
+    #endif
+            }
+            catch (const std::exception& e)
+            {
+                error(e.what(), __LINE__, __FUNCTION__, __FILE__);
+            }
+        }
+    #endif
+
+ 
+    PoseExtractorTensorRT::PoseExtractorTensorRT(const PoseModel poseModel, const std::string& modelFolder,
+                                                 const int gpuId, const std::vector<HeatMapType>& heatMapTypes,
+                                                 const ScaleMode heatMapScale, const bool enableGoogleLogging) :
+        PoseExtractor{poseModel, heatMapTypes, heatMapScale}
+        #ifdef USE_TENSORRT
+        , upImpl{new ImplPoseExtractorTensorRT{poseModel, gpuId, modelFolder, enableGoogleLogging}}
+        #endif
+    {
+        try
+        {
+            #ifdef USE_TENSORRT
+                // Layers parameters
+                upImpl->spBodyPartConnectorCaffe->setPoseModel(mPoseModel);
+            #else
+            UNUSED(poseModel);
+            UNUSED(modelFolder);
+            UNUSED(gpuId);
+            UNUSED(heatMapTypes);
+            UNUSED(heatMapScale);
+            error("OpenPose must be compiled with the `USE_CAFFE` macro definition in order to use this"
+                  " functionality.", __LINE__, __FUNCTION__, __FILE__);
+            #endif
+        }
+        catch (const std::exception& e)
+        {
+            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
+        }
+    }
+
+    PoseExtractorTensorRT::~PoseExtractorTensorRT()
+    {
+    }
+
+    void PoseExtractorTensorRT::netInitializationOnThread()
+    {
+        try
+        {
+            #ifdef USE_TENSORRT
+            
+                // Logging
+                log("Starting initialization on thread.", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
+                // Initialize Caffe net
+                addTensorRTNetOnThread(upImpl->spTensorRTNets, upImpl->spTensorRTNetOutputBlobs, upImpl->mPoseModel,
+                                    upImpl->mGpuId, upImpl->mModelFolder, upImpl->mEnableGoogleLogging);
+            
+                cudaCheck(__LINE__, __FUNCTION__, __FILE__);
+            
+                // Initialize blobs
+                upImpl->spHeatMapsBlob = {std::make_shared<caffe::Blob<float>>(1,1,1,1)};
+                upImpl->spPeaksBlob = {std::make_shared<caffe::Blob<float>>(1,1,1,1)};
+                upImpl->spPoseBlob = {std::make_shared<caffe::Blob<float>>(1,1,1,1)};
+
+                cudaCheck(__LINE__, __FUNCTION__, __FILE__);
+
+                // Logging
+                log("Finished initialization on thread.", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
+            #endif
+        }
+        catch (const std::exception& e)
+        {
+            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
+        }
+    }
+
+    void PoseExtractorTensorRT::forwardPass(const std::vector<Array<float>>& inputNetData,
+                                            const Point<int>& inputDataSize,
+                                            const std::vector<double>& scaleInputToNetInputs)
+    {
+        try
+        {
+            #ifdef USE_TENSORRT
+                // Security checks
+                if (inputNetData.empty())
+                    error("Empty inputNetData.", __LINE__, __FUNCTION__, __FILE__);
+                for (const auto& inputNetDataI : inputNetData)
+                    if (inputNetDataI.empty())
+                        error("Empty inputNetData.", __LINE__, __FUNCTION__, __FILE__);
+                if (inputNetData.size() != scaleInputToNetInputs.size())
+                    error("Size(inputNetData) must be same than size(scaleInputToNetInputs).",
+                          __LINE__, __FUNCTION__, __FILE__);
+            
+                timeNow("Start");
+            
+                // Resize std::vectors if required
+                const auto numberScales = inputNetData.size();
+                upImpl->mNetInput4DSizes.resize(numberScales);
+                while (upImpl->spTensorRTNets.size() < numberScales)
+                    addTensorRTNetOnThread(upImpl->spTensorRTNets, upImpl->spTensorRTNetOutputBlobs, upImpl->mPoseModel,
+                                        upImpl->mGpuId, upImpl->mModelFolder, false);
+            
+                // Process each image
+                for (auto i = 0u ; i < inputNetData.size(); i++)
+                {
+                    // 1. TensorRT deep network
+                    upImpl->spTensorRTNets.at(i)->forwardPass(inputNetData[i]);
+                    
+                    // Reshape blobs if required
+                    // Note: In order to resize to input size to have same results as Matlab, uncomment the commented
+                    // lines
+                    if (!vectorsAreEqual(upImpl->mNetInput4DSizes.at(i), inputNetData[i].getSize()))
+                        // || !vectorsAreEqual(upImpl->mScaleInputToNetInputs, scaleInputToNetInputs))
+                    {
+                        upImpl->mNetInput4DSizes.at(i) = inputNetData[i].getSize();
+                        mNetOutputSize = Point<int>{upImpl->mNetInput4DSizes[0][3],
+                            upImpl->mNetInput4DSizes[0][2]};
+                        // upImpl->mScaleInputToNetInputs = scaleInputToNetInputs;
+                        reshapePoseExtractorCaffe(upImpl->spResizeAndMergeCaffe, upImpl->spNmsCaffe,
+                                                  upImpl->spBodyPartConnectorCaffe, upImpl->spTensorRTNetOutputBlobs,
+                                                  upImpl->spHeatMapsBlob, upImpl->spPeaksBlob, upImpl->spPoseBlob,
+                                                  1.f, mPoseModel);
+                        // scaleInputToNetInputs[i], mPoseModel);
+                    }
+                }
+            
+                timeNow("TensorRT forwards");
+            
+                // 2. Resize heat maps + merge different scales
+                const auto caffeNetOutputBlobs = tensorRTNetSharedToPtr(upImpl->spTensorRTNetOutputBlobs);
+                const std::vector<float> floatScaleRatios(scaleInputToNetInputs.begin(), scaleInputToNetInputs.end());
+                upImpl->spResizeAndMergeCaffe->setScaleRatios(floatScaleRatios);
+            
+                upImpl->spResizeAndMergeCaffe->Forward_gpu(caffeNetOutputBlobs,                             // ~5ms
+                                                           {upImpl->spHeatMapsBlob.get()});
+                cudaCheck(__LINE__, __FUNCTION__, __FILE__);
+            
+                timeNow("Resize heat Maps");
+            
+                // 3. Get peaks by Non-Maximum Suppression
+                upImpl->spNmsCaffe->setThreshold((float)get(PoseProperty::NMSThreshold));
+                #ifdef USE_CUDA
+                upImpl->spNmsCaffe->Forward_gpu({upImpl->spHeatMapsBlob.get()}, {upImpl->spPeaksBlob.get()});// ~2ms
+                cudaCheck(__LINE__, __FUNCTION__, __FILE__);
+                #else
+                error("NmsCaffe CPU version not implemented yet.", __LINE__, __FUNCTION__, __FILE__);
+                #endif
+            
+                timeNow("Peaks by nms");
+            
+                // Get scale net to output (i.e. image input)
+                // Note: In order to resize to input size, (un)comment the following lines
+                const auto scaleProducerToNetInput = resizeGetScaleFactor(inputDataSize, mNetOutputSize);
+                const Point<int> netSize{intRound(scaleProducerToNetInput*inputDataSize.x),
+                    intRound(scaleProducerToNetInput*inputDataSize.y)};
+                mScaleNetToOutput = {(float)resizeGetScaleFactor(netSize, inputDataSize)};
+                // mScaleNetToOutput = 1.f;
+            
+                timeNow("Scale net to output");
+            
+                // 4. Connecting body parts
+                // Get scale net to output (i.e. image input)
+                upImpl->spBodyPartConnectorCaffe->setScaleNetToOutput(mScaleNetToOutput);
+                upImpl->spBodyPartConnectorCaffe->setInterMinAboveThreshold(
+                                                                            (float)get(PoseProperty::ConnectInterMinAboveThreshold)
+                                                                            );
+                upImpl->spBodyPartConnectorCaffe->setInterThreshold((float)get(PoseProperty::ConnectInterThreshold));
+                upImpl->spBodyPartConnectorCaffe->setMinSubsetCnt((int)get(PoseProperty::ConnectMinSubsetCnt));
+                upImpl->spBodyPartConnectorCaffe->setMinSubsetScore((float)get(PoseProperty::ConnectMinSubsetScore));
+            
+                // GPU version not implemented yet
+                // #ifdef USE_CUDA
+                //     upImpl->spBodyPartConnectorCaffe->Forward_gpu({upImpl->spHeatMapsBlob.get(),
+                //                                                    upImpl->spPeaksBlob.get()},
+                //                                                   {upImpl->spPoseBlob.get()}, mPoseKeypoints);
+                // #else
+                upImpl->spBodyPartConnectorCaffe->Forward_cpu({upImpl->spHeatMapsBlob.get(),
+                    upImpl->spPeaksBlob.get()},
+                                                              mPoseKeypoints, mPoseScores);
+                // #endif
+            
+                timeNow("Connect Body Parts");
+                 
+                const auto totalTimeSec = timeDiffToString(timings.back().second, timings.front().second);
+                const auto message = "Pose estimation successfully finished. Total time: " + totalTimeSec + " seconds.";
+                op::log(message, op::Priority::High);
+
+                for(OpTimings::iterator timing = timings.begin()+1; timing != timings.end(); ++timing) {
+                  const auto log_time = (*timing).first + " - " + timeDiffToString((*timing).second, (*(timing-1)).second);
+                  op::log(log_time, op::Priority::High);
+                }
+            #else
+                UNUSED(inputNetData);
+                UNUSED(inputDataSize);
+                UNUSED(scaleInputToNetInputs);
+            #endif
+        }
+        catch (const std::exception& e)
+        {
+            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
+        }
+    }
+
+    const float* PoseExtractorTensorRT::getHeatMapCpuConstPtr() const
+    {
+        try
+        {    
+            #ifdef USE_TENSORRT
+                checkThread();
+                return upImpl->spHeatMapsBlob->cpu_data();
+            #else
+                return nullptr;
+            #endif
+        }
+        catch (const std::exception& e)
+        {
+            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
+            return nullptr;
+        }
+    }
+
+    const float* PoseExtractorTensorRT::getHeatMapGpuConstPtr() const
+    {
+        try
+        {
+            #ifdef USE_TENSORRT
+                checkThread();
+                return upImpl->spHeatMapsBlob->gpu_data();
+            #else
+                return nullptr;
+            #endif
+        }
+        catch (const std::exception& e)
+        {
+            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
+            return nullptr;
+        }
+    }
+
+    std::vector<int> PoseExtractorTensorRT::getHeatMapSize() const
+    {
+        try
+        {
+            #ifdef USE_TENSORRT
+                checkThread();
+                return upImpl->spHeatMapsBlob->shape();
+            #else
+                return {};
+            #endif
+        }
+        catch (const std::exception& e)
+        {
+            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
+            return {};
+        }
+    }
+
+    const float* PoseExtractorTensorRT::getPoseGpuConstPtr() const
+    {
+        try
+        {
+            #ifdef USE_TENSORRT
+                error("GPU pointer for people pose data not implemented yet.", __LINE__, __FUNCTION__, __FILE__);
+                checkThread();
+                return upImpl->spPoseBlob->gpu_data();
+            #else
+                return nullptr;
+            #endif
+        }
+        catch (const std::exception& e)
+        {
+            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
+            return nullptr;
+        }
+    }
+}
diff --git a/ubuntu/Makefile.config.Ubuntu16_cuda8_JetsonTX2 b/ubuntu/Makefile.config.Ubuntu16_cuda8_JetsonTX2
index 476fc9f92..d3241f886 100644
--- a/ubuntu/Makefile.config.Ubuntu16_cuda8_JetsonTX2
+++ b/ubuntu/Makefile.config.Ubuntu16_cuda8_JetsonTX2
@@ -53,7 +53,7 @@ CUDA_ARCH := -gencode arch=compute_30,code=sm_30 \
 
 # DEEP_NET choice:
 # caffe for Caffe (default and only option so far)
-DEEP_NET := caffe
+DEEP_NET := tensorrt
 
 # Caffe directory
 CAFFE_DIR := 3rdparty/caffe/distribute
diff --git a/ubuntu/install_openpose_JetsonTX2_JetPack3.1.sh b/ubuntu/install_openpose_JetsonTX2_JetPack3.1.sh
index 57d71638e..7387e3bba 100755
--- a/ubuntu/install_openpose_JetsonTX2_JetPack3.1.sh
+++ b/ubuntu/install_openpose_JetsonTX2_JetPack3.1.sh
@@ -51,7 +51,6 @@ echo ""
 
 echo "------------------------- Compiling OpenPose -------------------------"
 # Go back to main folder
-cd ..
 # Copy Makefile.config
 cp ubuntu/Makefile.config.Ubuntu16_cuda8_JetsonTX2 Makefile.config
 # Compile OpenPose