Skip to content

[GSoC22] Data Augmentation Module in OpenCV (imgaug) #3335

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 23 commits into
base: 4.x
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions modules/aug/include/opencv2/aug.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#ifndef OPENCV_AUG_HPP
#define OPENCV_AUG_HPP

#include "opencv2/aug/transforms.hpp"
#include "opencv2/aug/transforms_det.hpp"
#include "opencv2/aug/functional.hpp"
#include "opencv2/aug/rng.hpp"

#endif
93 changes: 93 additions & 0 deletions modules/aug/include/opencv2/aug/functional.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#ifndef OPENCV_AUG_FUNCTIONAL_HPP
#define OPENCV_AUG_FUNCTIONAL_HPP
#include <opencv2/core.hpp>
#include <vector>

namespace cv {

// void blend(Mat& img1, Mat& img2, float ratio){
//
// }

static void adjust_brightness(Mat& img, double brightness_factor){
CV_Assert(brightness_factor >= 0);

int channels = img.channels();
if(channels != 1 and channels != 3){
CV_Error(Error::BadNumChannels, "Only support images with 1 or 3 channels");
}
img = img * brightness_factor;
// NOTE: Can substitute for-loop with matrix multiplication for better efficiency?
// int nc = channels * img.cols;
// for(int j=0; j<img.rows; j++){
// uchar* data = img.ptr<uchar>(j);
// for(int i=0; i<nc; i++){
// data[i] = static_cast<uchar>( data[i] * brightness_factor);
// }
// }
}

static void adjust_contrast(Mat& img, double contrast_factor){

CV_Assert(contrast_factor >= 0);

int num_channels = img.channels();
if(num_channels != 1 && num_channels != 3){
CV_Error(Error::BadNumChannels, "Only support images with 1 or 3 channels");
}
Mat channels[num_channels];
split(img, channels);
std::vector<Mat> new_channels;
for(int i=0; i < num_channels; i++){
Mat& channel = channels[i];
Scalar avg = mean(channel);
Mat avg_mat(channel.size(), channel.type(), avg);
Mat new_channel = contrast_factor * channel + (1-contrast_factor) * avg_mat;
new_channels.push_back(new_channel);
}
merge(new_channels, img);
}

static void adjust_saturation(Mat& img, double saturation_factor){
CV_Assert(saturation_factor >= 0);

int num_channels = img.channels();
if(num_channels != 1 && num_channels != 3){
CV_Error(Error::BadNumChannels, "Only support images with 1 or 3 channels");
}
if(img.channels() == 1) return;
Mat gray;
cvtColor(img, gray, COLOR_BGR2GRAY);
std::vector<Mat> gray_arrays = {gray, gray, gray};
merge(gray_arrays, gray);
img = saturation_factor * img + (1-saturation_factor) * gray;
}

static void adjust_hue(Mat& img, double hue_factor) {
// FIXME: the range of hue_factor needs to be modified
CV_Assert(hue_factor >= 0);

int num_channels = img.channels();
if (num_channels != 1 && num_channels != 3) {
CV_Error(Error::BadNumChannels, "Only support images with 1 or 3 channels");
}

if (num_channels == 1) return;
int hue_shift = saturate_cast<int> (hue_factor * 180);
Mat hsv;
cvtColor(img, hsv, COLOR_BGR2HSV);
for (int j=0; j<img.rows; j++){
for (int i=0; i<img.cols; i++){
int h = hsv.at<Vec3b>(j, i)[0];
if(h + hue_shift > 180)
h = h + hue_shift - 180;
else
h = h + hue_shift;
hsv.at<Vec3b>(j, i)[0] = h;
}
}
cvtColor(hsv, img, COLOR_HSV2BGR);
}
};

#endif
18 changes: 18 additions & 0 deletions modules/aug/include/opencv2/aug/rng.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
//
// Created by Chuyang Zhao on 2022/8/17.
//

#ifndef OPENCV_AUG_RNG_HPP
#define OPENCV_AUG_RNG_HPP

namespace cv{
namespace imgaug{
extern uint64 state;
extern cv::RNG rng;

CV_EXPORTS_W void setSeed(uint64 seed);
}
}


#endif //OPENCV_AUG_RNG_HPP
204 changes: 204 additions & 0 deletions modules/aug/include/opencv2/aug/transforms.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
#ifndef OPENCV_AUG_TRANSFORMS_HPP
#define OPENCV_AUG_TRANSFORMS_HPP

#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <vector>

namespace cv{
namespace imgaug{
/*
* Base class for all augmentation classes
*/
class CV_EXPORTS_W Transform{
public:
CV_WRAP virtual void call(InputArray src, OutputArray dst) const = 0;
CV_WRAP virtual ~Transform() = default;
};

class CV_EXPORTS_W Compose{
public:
CV_WRAP explicit Compose(std::vector<Ptr<Transform> >& transforms);
CV_WRAP void call(InputArray src, OutputArray dst) const;

std::vector<Ptr<Transform> > transforms;
};

CV_EXPORTS_W void randomCrop(InputArray src, OutputArray dst, const Size& sz, const Vec4i& padding=Vec4i() , bool pad_if_need=false, int fill=0, int padding_mode=BORDER_CONSTANT);

// CV_EXPORTS_W void randomCropV1(InputOutputArray src, const Size& sz, const Vec4i& padding=Vec4i() , bool pad_if_need=false, int fill=0, int padding_mode=BORDER_CONSTANT);

CV_EXPORTS_W void randomFlip(InputArray src, OutputArray dst, int flipCode=0, double p=0.5);

class CV_EXPORTS_W RandomCrop: public Transform{
public:
CV_WRAP explicit RandomCrop(const Size& sz, const Vec4i& padding=Vec4i(0,0,0,0), bool pad_if_need=false, int fill=0, int padding_mode=BORDER_CONSTANT);
CV_WRAP ~RandomCrop() override = default;
CV_WRAP void call(InputArray src, OutputArray dst) const override;

Size sz;
Vec4i padding;
bool pad_if_need;
int fill;
int padding_mode;
};

class CV_EXPORTS_W RandomFlip: public Transform{
public:
CV_WRAP explicit RandomFlip(int flipCode=0, double p=0.5);
CV_WRAP ~RandomFlip() override = default;
CV_WRAP void call(InputArray src, OutputArray dst) const override;

int flipCode;
double p;
};

/*
* All interpolation types: https://docs.opencv.org/3.4/da/d54/group__imgproc__transform.html#gga5bb5a1fea74ea38e1a5445ca803ff121ac97d8e4880d8b5d509e96825c7522deb
*/
class CV_EXPORTS_W Resize: public Transform{
public:
CV_WRAP explicit Resize(const Size& sz, int interpolation=INTER_LINEAR);
CV_WRAP ~Resize() override = default;
CV_WRAP void call(InputArray src, OutputArray dst) const override;

Size sz;
int interpolation;
};

CV_EXPORTS_W void centerCrop(InputArray src, OutputArray dst, const Size& size);

class CV_EXPORTS_W CenterCrop : public Transform {
public:
CV_WRAP explicit CenterCrop(const Size& size);
CV_WRAP ~CenterCrop() override = default;
CV_WRAP void call(InputArray src, OutputArray dst) const override;

Size size;
};

class CV_EXPORTS_W Pad : public Transform{
public:
CV_WRAP explicit Pad(const Vec4i& padding, const Scalar& = Scalar(), int padding_mode = BORDER_CONSTANT);
CV_WRAP void call(InputArray src, OutputArray dst) const override;

Vec4i padding;
const Scalar fill;
int padding_mode;
};

CV_EXPORTS void randomResizedCrop(InputArray src, OutputArray dst, const Size& size, const Vec2d& scale = Vec2d(0.08, 1.0), const Vec2d& ratio = Vec2d(3.0 / 4.0, 4.0 / 3.0), int interpolation = INTER_LINEAR);

class CV_EXPORTS_W RandomResizedCrop : public Transform {
public:
CV_WRAP explicit RandomResizedCrop(const Size& size, const Vec2d& scale = Vec2d(0.08, 1.0), const Vec2d& ratio = Vec2d(3.0 / 4.0, 4.0 / 3.0), int interpolation = INTER_LINEAR);
CV_WRAP void call(InputArray src, OutputArray dst) const override;

Size size;
Vec2d scale;
Vec2d ratio;
int interpolation;
};

CV_EXPORTS void colorJitter(InputArray src, OutputArray dst, const Vec2d& brightness=Vec2d(), const Vec2d& contrast=Vec2d(), const Vec2d& saturation=Vec2d(), const Vec2d& hue=Vec2d());

class CV_EXPORTS_W ColorJitter : public Transform {
public:
CV_WRAP explicit ColorJitter(const Vec2d& brightness=Vec2d(), const Vec2d& contrast=Vec2d(), const Vec2d& saturation=Vec2d(), const Vec2d& hue=Vec2d());
CV_WRAP void call(InputArray src, OutputArray dst) const override;

Vec2d brightness;
Vec2d contrast;
Vec2d saturation;
Vec2d hue;
};

CV_EXPORTS void randomRotation(InputArray src, OutputArray dst, const Vec2d& degrees, int interpolation=INTER_NEAREST, bool expand=false, const Point2f& center=Point2f(), int fill=0);

class CV_EXPORTS_W RandomRotation : public Transform {
public:
CV_WRAP explicit RandomRotation(const Vec2d& degrees, int interpolation=INTER_NEAREST, bool expand=false, const Point2f& center=Point2f(), int fill=0);
CV_WRAP void call(InputArray src, OutputArray dst) const override;

Vec2d degrees;
int interpolation;
bool expand;
Point2f center;
int fill;
};

class CV_EXPORTS_W GrayScale : public Transform {
public:
CV_WRAP explicit GrayScale(int num_channels=1);
CV_WRAP void call(InputArray src, OutputArray dst) const override;

int num_channels;
};

CV_EXPORTS void randomGrayScale(InputArray src, OutputArray dst, double p=0.1);

class CV_EXPORTS_W RandomGrayScale : public Transform {
public:
CV_WRAP explicit RandomGrayScale(double p=0.1);
CV_WRAP void call(InputArray src, OutputArray dst) const override;

double p;
};

CV_EXPORTS void randomErasing(InputArray src, OutputArray dst, double p=0.5, const Vec2d& scale=Vec2d(0.02, 0.33), const Vec2d& ratio=Vec2d(0.3, 0.33), const Scalar& value=Scalar(0, 100, 100), bool inplace=false);

class CV_EXPORTS_W RandomErasing : public Transform {
public:
CV_WRAP explicit RandomErasing(double p=0.5, const Vec2d& scale=Vec2d(0.02, 0.33), const Vec2d& ratio=Vec2d(0.3, 0.33), const Scalar& value=Scalar(0, 100, 100), bool inplace=false);
CV_WRAP void call(InputArray src, OutputArray dst) const override;

double p;
Vec2d scale;
Vec2d ratio;
Scalar value;
bool inplace;
};

// NOTE: After normalizing, data are represented as 32-bit float in range(0,1)
class CV_EXPORTS_W Normalize : public Transform {
public:
CV_WRAP explicit Normalize(const Scalar& mean=Scalar(0,0,0,0), const Scalar& std=Scalar(1,1,1,1));
CV_WRAP void call(InputArray src, OutputArray dst) const override;

Scalar mean;
Scalar std;
};

CV_EXPORTS void gaussianBlur(InputArray src, OutputArray dst, const Size& kernel_size, const Vec2f& sigma=Vec2f(0.1, 2.0));

class CV_EXPORTS_W GaussianBlurAug : public Transform {
public:
CV_WRAP explicit GaussianBlurAug(const Size& kernel_size, const Vec2f& sigma=Vec2f(0.1, 2.0));
CV_WRAP void call(InputArray src, OutputArray dst) const override;

Size kernel_size;
Vec2f sigma;
};

CV_EXPORTS void randomAffine(InputArray src, OutputArray dst, const Vec2f& degrees=Vec2f(0., 0.), const Vec2f& translations=Vec2f(0., 0.), const Vec2f& scales=Vec2f(1., 1.), const Vec4f& shears=Vec4f(0., 0., 0., 0.), int interpolation=INTER_NEAREST, const Scalar& fill=Scalar(), const Point2i& center=Point2i(-1, -1));

class CV_EXPORTS_W RandomAffine: public Transform{
public:
CV_WRAP explicit RandomAffine(const Vec2f& degrees=Vec2f(0., 0.), const Vec2f& translations=Vec2f(0., 0.), const Vec2f& scales=Vec2f(1., 1.), const Vec4f& shears=Vec4f(0., 0., 0., 0.), int interpolation=INTER_NEAREST, const Scalar& fill=Scalar(), const Point2i& center=Point2i(-1, -1));
CV_WRAP void call(InputArray src, OutputArray dst) const override;

Vec2f degrees;
Vec2f translations;
Vec2f scales;
Vec4f shears;
int interpolation;
Scalar fill;
Point2i center;

};
}


}

#endif
69 changes: 69 additions & 0 deletions modules/aug/include/opencv2/aug/transforms_det.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
//
// Created by Chuyang Zhao on 2022/8/9.
//

#ifndef OPENCV_TRANSFORMS_DET_HPP
#define OPENCV_TRANSFORMS_DET_HPP

namespace cv{
namespace imgaug{
namespace det{
class CV_EXPORTS_W Transform{
public:
CV_WRAP virtual void call(InputArray src, OutputArray dst, CV_IN_OUT std::vector<cv::Rect>& target) const = 0;
CV_WRAP virtual ~Transform() = default;
};

class CV_EXPORTS_W Compose{
public:
CV_WRAP explicit Compose(std::vector<cv::Ptr<cv::imgaug::det::Transform> >& transforms);
CV_WRAP void call(InputArray src, OutputArray dst, CV_IN_OUT std::vector<cv::Rect>& target) const;

std::vector<cv::Ptr<cv::imgaug::det::Transform> > transforms;
};

class CV_EXPORTS_W RandomFlip: cv::imgaug::det::Transform{
public:
CV_WRAP explicit RandomFlip(int flipCode=0, float p=0.5);
CV_WRAP void call(InputArray src, OutputArray dst, CV_IN_OUT std::vector<cv::Rect>& target) const;
void flipBoundingBox(std::vector<cv::Rect>& target, const Size& size) const;

int flipCode;
float p;
};

// class CV_EXPORTS_W RandomCrop: cv::det::Transform{
// public:
// CV_WRAP explicit RandomCrop(const Size& sz, const Vec4i& padding=Vec4i() , bool pad_if_need=false, const Scalar& fill=Scalar(), int padding_mode=BORDER_CONSTANT);
// CV_WRAP void call(InputArray src, OutputArray dst, std::vector<cv::Rect>& target) const;
//
// const Size sz;
// Vec4i padding;
// bool pad_if_need;
// Scalar fill;
// int padding_mode;
// };

class CV_EXPORTS_W Resize: cv::imgaug::det::Transform{
public:
CV_WRAP explicit Resize(const Size& size, int interpolation=INTER_NEAREST);
CV_WRAP void call(InputArray src, OutputArray dst, CV_IN_OUT std::vector<cv::Rect>& target) const;
void resizeBoundingBox(std::vector<cv::Rect>& target, const Size& imgSize) const;

const Size size;
int interpolation;
};

class CV_EXPORTS_W Convert: cv::imgaug::det::Transform{
public:
CV_WRAP explicit Convert(int code);
CV_WRAP void call(InputArray src, OutputArray dst, CV_IN_OUT std::vector<cv::Rect>& target) const;

int code;
};
}
}

}

#endif //OPENCV_TRANSFORMS_DET_HPP
Loading