feat(ml): torch image basic data augmentation

beniz · mergify[bot] · commit b9f85251380c · 2021-01-21T15:13:39.000Z
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -101,6 +101,7 @@ if (USE_TORCH)
     backends/torch/torchmodule.cc
     backends/torch/torchutils.cc
     backends/torch/optim/ranger.cc
+    backends/torch/torchdataaug.cc
 	)
 endif()
 
diff --git a/src/backends/torch/torchdataaug.cc b/src/backends/torch/torchdataaug.cc
@@ -0,0 +1,132 @@
+/**
+ * DeepDetect
+ * Copyright (c) 2021 Jolibrain
+ * Authors: Emmanuel Benazera <emmanuel.benazera@jolibrain.com>
+ *
+ * This file is part of deepdetect.
+ *
+ * deepdetect is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * deepdetect is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with deepdetect.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "torchdataaug.h"
+
+namespace dd
+{
+
+  void TorchImgRandAugCV::augment(cv::Mat &src)
+  {
+    // apply augmentation
+    if (_mirror)
+      applyMirror(src);
+    if (_rotate)
+      applyRotate(src);
+
+    // should be last, in this order
+    if (_cutout > 0.0)
+      applyCutout(src);
+    if (_crop_size > 0)
+      applyCrop(src);
+  }
+
+  void TorchImgRandAugCV::applyMirror(cv::Mat &src)
+  {
+#pragma omp critical
+    {
+      if (_bernouilli(_rnd_gen))
+        {
+          cv::Mat dst;
+          cv::flip(src, dst, 1);
+          src = dst;
+        }
+    }
+  }
+
+  void TorchImgRandAugCV::applyRotate(cv::Mat &src)
+  {
+    int rot = 0;
+#pragma omp critical
+    {
+      rot = _uniform_int_rotate(_rnd_gen);
+    }
+    if (rot == 0)
+      return;
+    else if (rot == 1) // 90
+      {
+        cv::Mat dst;
+        cv::transpose(src, dst);
+        cv::flip(dst, src, 1);
+      }
+    else if (rot == 2) // 180
+      {
+        cv::Mat dst;
+        cv::flip(src, dst, -1);
+        src = dst;
+      }
+    else if (rot == 3) // 270
+      {
+        cv::Mat dst;
+        cv::transpose(src, dst);
+        cv::flip(dst, src, 0);
+      }
+  }
+
+  void TorchImgRandAugCV::applyCrop(cv::Mat &src)
+  {
+    int crop_x = 0;
+    int crop_y = 0;
+#pragma omp critical
+    {
+      crop_x = _uniform_int_crop_x(_rnd_gen);
+      crop_y = _uniform_int_crop_y(_rnd_gen);
+    }
+    cv::Rect crop(crop_x, crop_y, _crop_size, _crop_size);
+    cv::Mat dst = src(crop).clone();
+    src = dst;
+  }
+
+  void TorchImgRandAugCV::applyCutout(cv::Mat &src)
+  {
+    // Draw random between 0 and 1
+    float r1 = 0.0;
+#pragma omp critical
+    {
+      r1 = _uniform_real_1(_rnd_gen);
+    }
+    if (r1 > _cutout)
+      return;
+
+#pragma omp critical
+    {
+      // get shape and area to erase
+      float s = _uniform_real_cutout_s(_rnd_gen) * _img_width
+                * _img_height;                    // area
+      float r = _uniform_real_cutout_r(_rnd_gen); // aspect ratio
+
+      int w = std::min(_img_width,
+                       static_cast<int>(std::floor(std::sqrt(s / r))));
+      int h = std::min(_img_height,
+                       static_cast<int>(std::floor(std::sqrt(s * r))));
+      std::uniform_int_distribution<int> distx(0, _img_width - w);
+      std::uniform_int_distribution<int> disty(0, _img_height - h);
+      int rect_x = distx(_rnd_gen);
+      int rect_y = disty(_rnd_gen);
+
+      // erase
+      cv::Rect rect(rect_x, rect_y, w, h);
+      cv::Mat selected_area = src(rect);
+      cv::randu(selected_area, cv::Scalar(_cutout_vl, _cutout_vl, _cutout_vl),
+                cv::Scalar(_cutout_vh, _cutout_vh, _cutout_vh)); // TODO: bw
+    }
+  }
+}
diff --git a/src/backends/torch/torchdataaug.h b/src/backends/torch/torchdataaug.h
@@ -0,0 +1,102 @@
+/**
+ * DeepDetect
+ * Copyright (c) 2021 Jolibrain
+ * Authors: Emmanuel Benazera <emmanuel.benazera@jolibrain.com>
+ *
+ * This file is part of deepdetect.
+ *
+ * deepdetect is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * deepdetect is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with deepdetect.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef TORCHDATAAUG_H
+#define TORCHDATAAUG_H
+
+#include <opencv2/opencv.hpp>
+#include <random>
+
+namespace dd
+{
+  class TorchImgRandAugCV
+  {
+  public:
+    TorchImgRandAugCV()
+    {
+    }
+
+    TorchImgRandAugCV(const int &img_width, const int &img_height,
+                      const bool &mirror, const bool &rotate,
+                      const int &crop_size, const bool &cutout)
+        : _img_width(img_width), _img_height(img_height), _mirror(mirror),
+          _rotate(rotate), _crop_size(crop_size), _cutout(cutout),
+          _uniform_real_1(0.0, 1.0), _bernouilli(0.5),
+          _uniform_int_rotate(0, 3)
+    {
+      if (_crop_size > 0)
+        {
+          _uniform_int_crop_x
+              = std::uniform_int_distribution<int>(0, _img_width - _crop_size);
+          _uniform_int_crop_y = std::uniform_int_distribution<int>(
+              0, _img_height - _crop_size);
+        }
+      if (_cutout > 0.0)
+        {
+          _uniform_real_cutout_s
+              = std::uniform_real_distribution<float>(_cutout_sl, _cutout_sh);
+          _uniform_real_cutout_r
+              = std::uniform_real_distribution<float>(_cutout_rl, _cutout_rh);
+        }
+    }
+
+    ~TorchImgRandAugCV()
+    {
+    }
+
+    void augment(cv::Mat &src);
+
+  protected:
+    void applyMirror(cv::Mat &src);
+    void applyRotate(cv::Mat &src);
+    void applyCrop(cv::Mat &src);
+    void applyCutout(cv::Mat &src);
+
+  private:
+    int _img_width = 224;
+    int _img_height = 224;
+
+    // augmentation options & parameter
+    bool _mirror = false;
+    bool _rotate = false;
+    int _crop_size = -1;
+    float _cutout = 0.0;
+    float _cutout_sl = 0.02; /**< min proportion of erased area wrt image. */
+    float _cutout_sh = 0.4;  /**< max proportion of erased area wrt image. */
+    float _cutout_rl = 0.3;  /**< min aspect ratio of erased area. */
+    float _cutout_rh = 3.0;  /**< max aspect ratio of erased area. */
+    int _cutout_vl = 0;      /**< min erased area pixel value. */
+    int _cutout_vh = 255;    /**< max erased area pixel value. */
+
+    // random generators
+    std::default_random_engine _rnd_gen;
+    std::uniform_real_distribution<float>
+        _uniform_real_1; /**< random real uniform between 0 and 1. */
+    std::bernoulli_distribution _bernouilli;
+    std::uniform_int_distribution<int> _uniform_int_rotate;
+    std::uniform_int_distribution<int> _uniform_int_crop_x;
+    std::uniform_int_distribution<int> _uniform_int_crop_y;
+    std::uniform_real_distribution<float> _uniform_real_cutout_s;
+    std::uniform_real_distribution<float> _uniform_real_cutout_r;
+  };
+}
+
+#endif
diff --git a/src/backends/torch/torchdataset.cc b/src/backends/torch/torchdataset.cc
@@ -137,6 +137,7 @@ namespace dd
         _dbData = std::shared_ptr<db::DB>(db::GetDB(_backend));
         _dbData->Open(_dbFullName, db::NEW);
         _txn = std::shared_ptr<db::Transaction>(_dbData->NewTransaction());
+        _logger->info("Preparing db of {}x{} images", bgr.cols, bgr.rows);
       }
 
     // data & target keys
@@ -296,7 +297,7 @@ namespace dd
     std::vector<BatchToStack> data, target;
     bool first_iter = true;
 
-    if (!_db)
+    if (!_db) // Note: no data augmentation if no db
       {
         if (!_lfiles.empty()) // prefetch batch from file list
           {
@@ -428,6 +429,9 @@ namespace dd
                 torch::Tensor targett;
                 read_image_from_db(datas, targets, bgr, targett, inputc->_bw);
 
+                // data augmentation can apply here, with OpenCV
+                _img_rand_aug_cv.augment(bgr);
+
                 torch::Tensor imgt
                     = image_to_tensor(bgr, inputc->height(), inputc->width());
 
diff --git a/src/backends/torch/torchdataset.h b/src/backends/torch/torchdataset.h
@@ -33,6 +33,7 @@
 #include "backends/torch/db_lmdb.hpp"
 
 #include "inputconnectorstrategy.h"
+#include "torchdataaug.h"
 #include "torchutils.h"
 
 #include <opencv2/opencv.hpp>
@@ -79,7 +80,8 @@ namespace dd
         = nullptr;               /**< back ptr to input connector. */
     bool _classification = true; /**< whether a classification dataset. */
 
-    bool _image = false; /**< whether an image dataset. */
+    bool _image = false;                /**< whether an image dataset. */
+    TorchImgRandAugCV _img_rand_aug_cv; /**< image data augmentation policy. */
 
     /**
      * \brief empty constructor
@@ -98,7 +100,8 @@ namespace dd
           _logger(d._logger), _shuffle(d._shuffle), _dbData(d._dbData),
           _indices(d._indices), _lfiles(d._lfiles), _batches(d._batches),
           _dbFullName(d._dbFullName), _inputc(d._inputc),
-          _classification(d._classification), _image(d._image)
+          _classification(d._classification), _image(d._image),
+          _img_rand_aug_cv(d._img_rand_aug_cv)
     {
     }
 
diff --git a/src/backends/torch/torchinputconns.cc b/src/backends/torch/torchinputconns.cc
@@ -228,7 +228,7 @@ namespace dd
         if (shouldLoad)
           {
             if (_db)
-              _tilogger->info("Load from db");
+              _tilogger->info("Preparation for training from db");
             // Get files paths
             try
               {
diff --git a/src/backends/torch/torchlib.cc b/src/backends/torch/torchlib.cc
@@ -485,7 +485,35 @@ namespace dd
         throw;
       }
 
+    // TODO: set inputc dataset data augmentation options
     APIData ad_mllib = ad.getobj("parameters").getobj("mllib");
+    bool has_data_augmentation
+        = ad_mllib.has("mirror") || ad_mllib.has("rotate")
+          || ad_mllib.has("crop_size") || ad_mllib.has("cutout");
+    if (has_data_augmentation)
+      {
+        bool has_mirror
+            = ad_mllib.has("mirror") && ad_mllib.get("mirror").get<bool>();
+        this->_logger->info("mirror: {}", has_mirror);
+        bool has_rotate
+            = ad_mllib.has("rotate") && ad_mllib.get("rotate").get<bool>();
+        this->_logger->info("rotate: {}", has_rotate);
+        int crop_size = -1;
+        if (ad_mllib.has("crop_size"))
+          {
+            crop_size = ad_mllib.get("crop_size").get<int>();
+            this->_logger->info("crop_size : {}", crop_size);
+          }
+        float cutout = 0.0;
+        if (ad_mllib.has("cutout"))
+          {
+            cutout = ad_mllib.get("cutout").get<double>();
+            this->_logger->info("cutout: {}", cutout);
+          }
+        inputc._dataset._img_rand_aug_cv
+            = TorchImgRandAugCV(inputc.width(), inputc.height(), has_mirror,
+                                has_rotate, crop_size, cutout);
+      }
 
     // solver params
     int64_t iterations = 1;
@@ -610,7 +638,6 @@ namespace dd
 
         for (TorchBatch batch : *dataloader)
           {
-
             auto tstart = steady_clock::now();
             if (_masked_lm)
               {
diff --git a/tests/ut-torchapi.cc b/tests/ut-torchapi.cc
@@ -333,7 +333,7 @@ TEST(torchapi, service_train_images)
         "\"supervised\",\"model\":{\"repository\":\""
         + resnet50_train_repo
         + "\"},\"parameters\":{\"input\":{\"connector\":\"image\","
-          "\"width\":224,\"height\":224,\"db\":true},\"mllib\":{\"nclasses\":"
+          "\"width\":256,\"height\":256,\"db\":true},\"mllib\":{\"nclasses\":"
           "2,\"finetuning\":true,\"gpu\":true}}}";
   std::string joutstr = japi.jrender(japi.service_create(sname, jstr));
   ASSERT_EQ(created_str, joutstr);
@@ -345,7 +345,9 @@ TEST(torchapi, service_train_images)
         + iterations_resnet50 + ",\"base_lr\":" + torch_lr
         + ",\"iter_size\":4,\"solver_type\":\"ADAM\",\"test_"
           "interval\":200},\"net\":{\"batch_size\":4},"
-          "\"resume\":false},"
+          "\"resume\":false,\"mirror\":true,\"rotate\":true,\"crop_size\":224,"
+          "\"cutout\":0.5}"
+          ","
           "\"input\":{\"seed\":12345,\"db\":true,\"shuffle\":true},"
           "\"output\":{\"measure\":[\"f1\",\"acc\"]}},\"data\":[\""
         + resnet50_train_data + "\",\"" + resnet50_test_data + "\"]}";

Original file line number	Diff line number	Diff line change
`@@ -101,6 +101,7 @@ if (USE_TORCH)`
`101`	`101`	`backends/torch/torchmodule.cc`
`102`	`102`	`backends/torch/torchutils.cc`
`103`	`103`	`backends/torch/optim/ranger.cc`
	`104`	`+ backends/torch/torchdataaug.cc`
`104`	`105`	`)`
`105`	`106`	`endif()`
`106`	`107`
Original file line number	Diff line number	Diff line change
`@@ -228,7 +228,7 @@ namespace dd`
`228`	`228`	`if (shouldLoad)`
`229`	`229`	`{`
`230`	`230`	`if (_db)`
`231`		`- _tilogger->info("Load from db");`
	`231`	`+ _tilogger->info("Preparation for training from db");`
`232`	`232`	`// Get files paths`
`233`	`233`	`try`
`234`	`234`	`{`