From c060c543c731abb18d82df067ef30806e9edb40a Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Sat, 8 Feb 2025 00:47:52 +0800 Subject: [PATCH 01/36] fix: each constructor now have to make it share_ptr individually, which the share_ptr will not initiated before calling constructor --- include/core/tensor.hpp | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/include/core/tensor.hpp b/include/core/tensor.hpp index 8310ced..846f3d3 100644 --- a/include/core/tensor.hpp +++ b/include/core/tensor.hpp @@ -8,10 +8,11 @@ using namespace std; template class Tensor { private: - shared_ptr> data_ = make_shared>(); // data is stored as a 1D vector // shared_ptr is used to avoid copying data + shared_ptr> data_ = nullptr; // data is stored as a 1D vector // shared_ptr is used to avoid copying data vector shapes_; // store the dimensions of the tensor vector strides_; // store the strides of the tensor size_t offset_ = 0; // offset for slicing + mutable int64_t size_ = -1; // it can be changed by const member functions (in size() function) // Helper function to calculate the index in the 1D vector for a given set of indices expressed in the form of N-D vector size_t calculate_idx(const vector& idxs) const { @@ -210,7 +211,7 @@ class Tensor { public: Tensor() = default; - + // Helper to recursively flatten nested vectors and compute shapes template void flatten_vector(const std::vector& vec, size_t depth = 0) { @@ -245,8 +246,9 @@ class Tensor { } else { // Ensure leaf elements match the Tensor's data type // static_assert(std::is_same_v, "Element type must match Tensor type"); + this->data_->reserve(this->data_->size() + vec.size()); for (const auto& elem : vec) { - this->data_->push_back(static_cast(elem)); + this->data_->emplace_back(static_cast(elem)); } } } @@ -254,6 +256,7 @@ class Tensor { // Constructor for nested vectors template Tensor(const std::vector& input) { + this->data_ = make_shared>(); flatten_vector(input); this->calculate_strides(); } @@ -308,6 +311,7 @@ class Tensor { this->shapes_ = vector { n, m }; + this->data_ = make_shared>(); this->data_->reserve(n * m); // Optimize memory allocation for (const initializer_list& row : data_2d) { @@ -322,6 +326,7 @@ class Tensor { this->shapes_ = vector { n, m, l }; + this->data_ = make_shared>(); this->data_->reserve(n * m * l); // Optimize memory allocation for (const initializer_list>& matrix : data_3d) { @@ -338,6 +343,7 @@ class Tensor { this->shapes_ = vector { n, m, l, k }; + this->data_ = make_shared>(); this->data_->reserve(n * m * l * k); // Optimize memory allocation for (const initializer_list>>& tensor : data_4d) { @@ -351,14 +357,14 @@ class Tensor { } // certin value constructor - Tensor(const vector& shape, const T& value) { + explicit Tensor(const vector& shape, const T& value) { this->shapes_ = shape; size_t size = 1; for (const size_t& dim : shape) { size *= dim; } - this->data_->resize(size, value); + this->data_ = make_shared>(size, value); this->calculate_strides(); } @@ -542,8 +548,8 @@ class Tensor { swap(result.shapes_[dim0], result.shapes_[dim1]); swap(result.strides_[dim0], result.strides_[dim1]); - cout << "result.shapes_: " << result.shapes_[0] << " " << result.shapes_[1] << endl; - cout << "result.strides_: " << result.strides_[0] << " " << result.strides_[1] << endl; + // cout << "result.shapes_: " << result.shapes_[0] << " " << result.shapes_[1] << endl; + // cout << "result.strides_: " << result.strides_[0] << " " << result.strides_[1] << endl; return result; } @@ -720,11 +726,20 @@ class Tensor { } const size_t size() const { - size_t n = 1; + if (this->offset_ == 0) { + return this->data_->size(); + } + + if (this->size_ != -1) { + return this->size_; + } + + this->size_ = 1; for (const size_t& s : this->shapes_) { - n *= s; + this->size_ *= s; } - return n; + + return this->size_; } /// @brief Print the tensor to console. From 997b590829a5ccdd06a232317df48149f14e90b5 Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Sat, 8 Feb 2025 00:48:32 +0800 Subject: [PATCH 02/36] refractor: change the weight name --- include/modules/layers/linear.hpp | 18 ++++++------- src/modules/layers/linear.cpp | 45 +++++++++++++++++-------------- 2 files changed, 34 insertions(+), 29 deletions(-) diff --git a/include/modules/layers/linear.hpp b/include/modules/layers/linear.hpp index 5265488..bd77598 100644 --- a/include/modules/layers/linear.hpp +++ b/include/modules/layers/linear.hpp @@ -14,21 +14,21 @@ class Linear : public Module{ void randomizeParams(); // setters - inline void set_weights(const Tensor<>& desiredWeights) { this->weights_ = desiredWeights; }; - inline void set_biases(const Tensor<>& desiredBiases) { this->biases_ = desiredBiases; } + inline void set_weights(const Tensor<>& target_weight) { this->weight_ = target_weight; }; + inline void set_biases(const Tensor<>& target_bias) { this->bias_ = target_bias; } // getters - inline const Tensor<>& getWeights() const { return this->weights_; } - inline const Tensor<>& getBiases() const { return this->biases_; } + inline const Tensor<>& getWeights() const { return this->weight_; } + inline const Tensor<>& getBiases() const { return this->bias_; } private: size_t in_features_; size_t out_features_; - bool bias_; - Tensor<> weights_; - Tensor<> biases_; - Tensor<> grad_weights_; - Tensor<> grad_biases_; + bool use_bias_; + Tensor<> weight_; + Tensor<> bias_; + Tensor<> grad_weight_; + Tensor<> grad_bias_; }; } \ No newline at end of file diff --git a/src/modules/layers/linear.cpp b/src/modules/layers/linear.cpp index c1b01b0..e033725 100644 --- a/src/modules/layers/linear.cpp +++ b/src/modules/layers/linear.cpp @@ -3,32 +3,33 @@ #include "linear.hpp" using namespace nn; -Linear::Linear(size_t in_features, size_t out_features, bool bias) : in_features_(in_features), out_features_(out_features), bias_(bias) { - this->weights_ = Tensor<>({in_features, out_features}, 0.0f); +Linear::Linear(size_t in_features, size_t out_features, bool bias) : in_features_(in_features), out_features_(out_features), use_bias_(bias) { + this->weight_ = Tensor<>({in_features, out_features}, 0.0f); - if (bias) { - this->biases_ = Tensor<>({out_features, 1}, 0.0f); + if (this->use_bias_) { + this->bias_ = Tensor<>({out_features, 1}, 0.0f); } else { - this->biases_ = Tensor<>(); + this->bias_ = Tensor<>(); } // randomize the weights. The bias is originally 0. this->randomizeParams(); - this->grad_weights_ = Tensor<>({in_features, out_features}, 0.0f);; - this->grad_biases_ = Tensor<>({out_features, 1}, 0.0f); + this->grad_weight_ = Tensor<>({in_features, out_features}, 0.0f);; + this->grad_bias_ = Tensor<>({out_features, 1}, 0.0f); - this->input_cache_ = Tensor<>(); + cout << "Linear layer initialized with in_features = " << in_features << " and out_features = " << out_features << endl; + cout << &this->input_cache_ << endl; } Tensor<> Linear::forward(const Tensor<>& input) { this->input_cache_ = input; size_t batchSize = input.shapes()[0]; - const Tensor<>& XW = input.matmul(this->weights_); + const Tensor<>& XW = input.matmul(this->weight_); - if (!this->bias_) { + if (!this->use_bias_) { return XW; } @@ -36,7 +37,7 @@ Tensor<> Linear::forward(const Tensor<>& input) { for (size_t i = 0; i < batchSize; i++) { for (size_t j = 0; j < this->out_features_; j++) { - biases_repeated[i, j] = this->biases_[j, 0]; + biases_repeated[i, j] = this->bias_[j, 0]; } } @@ -47,24 +48,24 @@ Tensor<> Linear::backward(const Tensor<>& grad_output) { // dL/dY = grad_output // dL/dW = X^T * dL/dY - this->grad_weights_ = this->input_cache_.transpose().matmul(grad_output); + this->grad_weight_ = this->input_cache_.transpose().matmul(grad_output); // cout << endl << "dL/dW: " << endl; - // this->grad_weights_.print(); + // this->grad_weight_.print(); // cout << endl; // dL/dX = dL/dY * W^T - Tensor<> grad_input = grad_output.matmul(this->weights_.transpose()); + Tensor<> grad_input = grad_output.matmul(this->weight_.transpose()); /* dL/db = dL/dY^T * 1_B (1_B is a vector of ones of size batchSize) dL/db = dL/dY.sum(axis=0) */ - if (this->bias_) - this->grad_biases_ = grad_output.transpose().matmul(Tensor<>({grad_output.shapes()[0], 1}, 1.0f)); + if (this->use_bias_) + this->grad_bias_ = grad_output.transpose().matmul(Tensor<>({grad_output.shapes()[0], 1}, 1.0f)); // cout << endl << "dL/db: " << endl; - // this->grad_biases_.print(); + // this->grad_bias_.print(); // cout << endl; return grad_input; @@ -72,8 +73,8 @@ Tensor<> Linear::backward(const Tensor<>& grad_output) { void Linear::update_params(const float lr) { - this->weights_ -= this->grad_weights_ * lr; - this->biases_ -= this->grad_biases_ * lr; + this->weight_ -= this->grad_weight_ * lr; + this->bias_ -= this->grad_bias_ * lr; return; } @@ -87,10 +88,14 @@ void Linear::randomizeParams() { mt19937 gen(rd()); uniform_real_distribution dis(-limit, limit); + cout << "Starting randomization" << endl; + // Xavier initialization for (size_t i = 0; i < this->in_features_; i++) { for (size_t j = 0; j < this->out_features_; j++) { - this->weights_[i, j] = dis(gen); + this->weight_[i, j] = dis(gen); } } + + cout << "Finished randomization" << endl; } \ No newline at end of file From ca268ac770f57f3f1f83afe060831a6f045b3b43 Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Sat, 8 Feb 2025 00:49:08 +0800 Subject: [PATCH 03/36] refractor: remove grad_output --- include/core/loss.hpp | 2 +- src/modules/losses/cross_entropy.cpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/core/loss.hpp b/include/core/loss.hpp index d35d8de..4f3eb14 100644 --- a/include/core/loss.hpp +++ b/include/core/loss.hpp @@ -4,6 +4,7 @@ namespace nn { class Loss { public: + Loss() = default; virtual ~Loss() = default; virtual double forward(const Tensor<>& Y_hat, const Tensor<>& Y) = 0; virtual Tensor<> backward() = 0; @@ -11,7 +12,6 @@ class Loss { protected: - Tensor<> grad_output_; Tensor<> Y_cache_; Tensor<> Y_hat_cache_; }; diff --git a/src/modules/losses/cross_entropy.cpp b/src/modules/losses/cross_entropy.cpp index 002b585..a75f61a 100644 --- a/src/modules/losses/cross_entropy.cpp +++ b/src/modules/losses/cross_entropy.cpp @@ -4,7 +4,8 @@ using namespace nn; CrossEntropyLoss::CrossEntropyLoss() { - this->softmax_ = Softmax(); + cout << "Starting CrossEntropyLoss" << endl; + cout << "CrossEntropyLoss initialized" << endl; } double CrossEntropyLoss::forward(const Tensor<>& Y_hat, const Tensor<>& Y) { From ba12669be122ffc2bb75dcaa154b25607eea7b4b Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Sat, 8 Feb 2025 00:49:17 +0800 Subject: [PATCH 04/36] refractor --- src/utils/tensor_utils.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/utils/tensor_utils.cpp b/src/utils/tensor_utils.cpp index 1669905..0baa8a6 100644 --- a/src/utils/tensor_utils.cpp +++ b/src/utils/tensor_utils.cpp @@ -55,7 +55,6 @@ vector apply_slice(const Slice& slice, size_t dim_size) { // cout << "start applying slice" << endl; for (size_t i = start; i < stop; i += step) { - // cout << "i: " << i << endl; indices.push_back(i); } return indices; From 387507b6b65ee529ee4c6c1514c0427cc97d80dd Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Sat, 8 Feb 2025 00:49:50 +0800 Subject: [PATCH 05/36] temporarily remove the implementation --- src/modules/layers/conv2d.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/modules/layers/conv2d.cpp b/src/modules/layers/conv2d.cpp index d1d44d0..05224ee 100644 --- a/src/modules/layers/conv2d.cpp +++ b/src/modules/layers/conv2d.cpp @@ -2,11 +2,11 @@ using namespace nn; Conv2d::Conv2d(int in_channels, int out_channels, int kernel_size, int stride, int padding, int dilation, bool bias) { - this->in_channels_ = in_channels; - this->out_channels_ = out_channels; - this->kernel_size_ = kernel_size; - this->stride_ = stride; - this->padding_ = padding; - this->dilation_ = dilation; - this->bias_ = bias; + // this->in_channels_ = in_channels; + // this->out_channels_ = out_channels; + // this->kernel_size_ = kernel_size; + // this->stride_ = stride; + // this->padding_ = padding; + // this->dilation_ = dilation; + // this->bias_ = bias; } From 342fb37ea10304360c1137ec1ddc1c0eddce721a Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Sat, 8 Feb 2025 00:50:07 +0800 Subject: [PATCH 06/36] add comment to debug --- examples/main.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/examples/main.cpp b/examples/main.cpp index 1c62a61..616e667 100644 --- a/examples/main.cpp +++ b/examples/main.cpp @@ -29,14 +29,20 @@ int main() { // Initialize the model MLP model = MLP({784, 128, 64, 10}, DROPOUT_P); + cout << "Finished model initialization" << endl; + // Define the loss function CrossEntropyLoss criterion = CrossEntropyLoss(); + cout << "Finished loss initialization" << endl; + double loss = 0.0; double acc = 0.0; vector loss_list; vector accuracy_list; + cout << "Training started..." << endl; + // // Train the model // Example of iterating through all batches for (size_t e = 0; e < EPOCH; e++) { From afaf692940117991ae1cfbb7122f157555f80570 Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Sat, 8 Feb 2025 00:50:27 +0800 Subject: [PATCH 07/36] fix: dtype have to construct the share_ptr --- include/utils/tensor_utils.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/utils/tensor_utils.hpp b/include/utils/tensor_utils.hpp index efa2b35..941231c 100644 --- a/include/utils/tensor_utils.hpp +++ b/include/utils/tensor_utils.hpp @@ -78,6 +78,8 @@ template Tensor dtype_impl(const Tensor& tensor) { Tensor result; result.shapes_ = tensor.shapes_; + + result.data_ = make_shared>(); result.data_->resize(tensor.data_->size()); std::transform(tensor.data_->begin(), tensor.data_->end(), result.data_->begin(), From 51c0c85e115005892dc2a390420eccc479d03ef9 Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Sat, 8 Feb 2025 00:51:02 +0800 Subject: [PATCH 08/36] fix: add default constructor --- include/core/module.hpp | 1 + include/modules/activations/softmax.hpp | 1 + src/modules/activations/softmax.cpp | 5 +++++ 3 files changed, 7 insertions(+) diff --git a/include/core/module.hpp b/include/core/module.hpp index 986cf16..ea7b266 100644 --- a/include/core/module.hpp +++ b/include/core/module.hpp @@ -10,6 +10,7 @@ namespace nn { class Module { public: + Module() = default; /** * Virtual destructor for the Module class. */ diff --git a/include/modules/activations/softmax.hpp b/include/modules/activations/softmax.hpp index 96542cc..9447211 100644 --- a/include/modules/activations/softmax.hpp +++ b/include/modules/activations/softmax.hpp @@ -10,6 +10,7 @@ class Softmax : public Module { Tensor<> softmax_helper(const Tensor<>& input); vector softmax_helper(const vector& input); public: + Softmax(); Tensor<> forward(const Tensor<>& input); Tensor<> backward(const Tensor<>& grad_output); const Tensor<>& get_softmax_input_cache() const { return this->softmax_input_cache_; } diff --git a/src/modules/activations/softmax.cpp b/src/modules/activations/softmax.cpp index 2e2be8d..a7afdca 100644 --- a/src/modules/activations/softmax.cpp +++ b/src/modules/activations/softmax.cpp @@ -2,6 +2,11 @@ #include "softmax.hpp" using namespace nn; +Softmax::Softmax() { + cout << "Starting Softmax" << endl; + cout << "Softmax initialized" << endl; +} + Tensor<> Softmax::softmax_helper(const Tensor<>& input) { Tensor<> result = input.map([](double x) { return exp(x); }); double sum = result.sum(); From 564bf2562c0ab70e10b1a79c66b240c0b3241f97 Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Sat, 8 Feb 2025 00:51:31 +0800 Subject: [PATCH 09/36] fix: no dtype conversion in to_tensor to enhance efficiency --- src/datasets/mnist.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/datasets/mnist.cpp b/src/datasets/mnist.cpp index add5f07..bb8b178 100644 --- a/src/datasets/mnist.cpp +++ b/src/datasets/mnist.cpp @@ -114,9 +114,7 @@ bool MNIST::read_labels(const string& path) { tuple, Tensor<>> Batch::to_tensor() { Tensor<> data = this->batch_data; + Tensor<> labels = this->batch_labels; - Tensor labels_int = this->batch_labels; - Tensor<> labels = labels_int.dtype(); - return make_tuple(data, labels); } \ No newline at end of file From de22280b5796b852d0f448d3597c753fd54eaac0 Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Sat, 8 Feb 2025 00:51:50 +0800 Subject: [PATCH 10/36] refractor: change the source file sequence --- CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4c09c2f..270c6af 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,8 +31,9 @@ include_directories( # Add source files set(SOURCE_FILES - src/core/module.cpp src/core/tensor.cpp + src/utils/tensor_utils.cpp + src/core/module.cpp src/modules/layers/linear.cpp src/modules/layers/dropout.cpp src/modules/layers/conv2d.cpp @@ -40,7 +41,6 @@ set(SOURCE_FILES src/modules/activations/relu.cpp src/modules/activations/softmax.cpp src/modules/losses/cross_entropy.cpp - src/utils/tensor_utils.cpp src/datasets/mnist.cpp src/models/mlp.cpp src/metrics/accuracy.cpp @@ -51,7 +51,7 @@ set(SOURCE_FILES add_library(neuralnet ${SOURCE_FILES}) # Add the executable for the main example -add_executable(main examples/test_tensor.cpp) +add_executable(main examples/main.cpp) target_link_libraries(main neuralnet) # Only build tests if BUILD_TESTS is ON From 84bc534c62a209a5a99ea9696ecfcfcd5dfdeb52 Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Sat, 8 Feb 2025 00:52:16 +0800 Subject: [PATCH 11/36] fix: now kernel size, stride, padding, and dilation are tuple of int instead of int --- include/modules/layers/conv2d.hpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/include/modules/layers/conv2d.hpp b/include/modules/layers/conv2d.hpp index 2d68d66..66907a4 100644 --- a/include/modules/layers/conv2d.hpp +++ b/include/modules/layers/conv2d.hpp @@ -11,15 +11,14 @@ class Conv2d : public Module { virtual void update_params(const float lr) override; Tensor<> convolution(const Tensor<>& input, const Tensor<> filter); - Tensor<> full_convolution(const Tensor<>& input, const Tensor<> filter); private: - int in_channels_; - int out_channels_; - int kernel_size_; - int stride_; - int padding_; - int dilation_; + int64_t in_channels_; + int64_t out_channels_; + tuple kernel_size_; + tuple stride_; + tuple padding_; + tuple dilation_; bool bias_; Tensor<> weights_; Tensor<> biases_; From 55f4fd8b76189d67510aee23933f315fc170448c Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Sun, 9 Feb 2025 17:02:41 +0800 Subject: [PATCH 12/36] feat: finish conv2d constructor and convolution implementation --- include/modules/layers/conv2d.hpp | 47 ++++++---- src/modules/layers/conv2d.cpp | 151 ++++++++++++++++++++++++++++-- 2 files changed, 174 insertions(+), 24 deletions(-) diff --git a/include/modules/layers/conv2d.hpp b/include/modules/layers/conv2d.hpp index 66907a4..af2965e 100644 --- a/include/modules/layers/conv2d.hpp +++ b/include/modules/layers/conv2d.hpp @@ -1,29 +1,44 @@ +#include #include "module.hpp" using namespace nn; -namespace nn { +using int2 = std::pair; +using var_pair = std::variant; -class Conv2d : public Module { +namespace nn +{ + + class Conv2d : public Module + { public: - Conv2d(int in_channels, int out_channels, int kernel_size, int stride = 1, int padding = 0, int dilation = 1, bool bias = true); - virtual Tensor<> forward(const Tensor<>& input) override; - virtual Tensor<> backward(const Tensor<>& grad_output) override; + Conv2d(int64_t in_channels, + int64_t out_channels, + var_pair kernel_size, + var_pair stride = 1, + var_pair padding = 0, + var_pair dilation = 1, + bool bias = true); + + virtual Tensor<> forward(const Tensor<> &input) override; + virtual Tensor<> backward(const Tensor<> &grad_output) override; virtual void update_params(const float lr) override; - Tensor<> convolution(const Tensor<>& input, const Tensor<> filter); - + Tensor<> convolution(const int2 &stride, const int2 &dilation, const vector &output_shape, const Tensor<> &input, const Tensor<> &kernel, const Tensor<> &bias, bool use_bias); + + std::tuple calculate_output_shape(const Tensor<> &input); + private: int64_t in_channels_; int64_t out_channels_; - tuple kernel_size_; - tuple stride_; - tuple padding_; - tuple dilation_; - bool bias_; - Tensor<> weights_; - Tensor<> biases_; + int2 kernel_size_; + int2 stride_; + int2 padding_; + int2 dilation_; + bool use_bias_; + vector original_input_shape_; + Tensor<> weight_; + Tensor<> bias_; Tensor<> grad_weights_; Tensor<> grad_biases_; -}; - + }; } \ No newline at end of file diff --git a/src/modules/layers/conv2d.cpp b/src/modules/layers/conv2d.cpp index 05224ee..6cc7fdf 100644 --- a/src/modules/layers/conv2d.cpp +++ b/src/modules/layers/conv2d.cpp @@ -1,12 +1,147 @@ #include "conv2d.hpp" using namespace nn; -Conv2d::Conv2d(int in_channels, int out_channels, int kernel_size, int stride, int padding, int dilation, bool bias) { - // this->in_channels_ = in_channels; - // this->out_channels_ = out_channels; - // this->kernel_size_ = kernel_size; - // this->stride_ = stride; - // this->padding_ = padding; - // this->dilation_ = dilation; - // this->bias_ = bias; +Conv2d::Conv2d(int64_t in_channels, + int64_t out_channels, + var_pair kernel_size, + var_pair stride, + var_pair padding, + var_pair dilation, + bool bias) +{ + this->in_channels_ = in_channels; + this->out_channels_ = out_channels; + this->use_bias_ = bias; + + // Helper lambda to process variant parameters + auto process_variant = [](auto &&arg) -> int2 + { + using T = std::decay_t; + if constexpr (std::is_same_v) + { + if (arg < 0) + { + throw std::invalid_argument("Negative kernel size, stride, padding, or dilation is not supported"); + } + return {arg, arg}; + } + else + { + static_assert(std::is_same_v>, "Unexpected type in variant"); + if (arg.first < 0 || arg.second < 0) + { + throw std::invalid_argument("Negative kernel size, stride, padding, or dilation is not supported"); + } + return arg; + } + }; + + this->kernel_size_ = std::visit(process_variant, kernel_size); + this->stride_ = std::visit(process_variant, stride); + this->padding_ = std::visit(process_variant, padding); + this->dilation_ = std::visit(process_variant, dilation); + + vector weight_shape = {(size_t)this->out_channels_, (size_t)this->in_channels_, (size_t)this->kernel_size_.first, (size_t)this->kernel_size_.second}; + + this->weight_ = Tensor<>(weight_shape, 0.0); + + if (this->use_bias_) + { + vector bias_shape = {(size_t)this->out_channels_}; + this->bias_ = Tensor<>(bias_shape, 0.0); + } +} + +Tensor<> Conv2d::forward(const Tensor<> &input) +{ + this->original_input_shape_ = input.shapes(); +} + +Tensor<> Conv2d::backward(const Tensor<> &grad_output) +{ +} + +void Conv2d::update_params(const float lr) +{ +} + +Tensor<> Conv2d::convolution(const int2 &stride, const int2 &dilation, const vector &output_shape, const Tensor<> &input, const Tensor<> &kernel, const Tensor<> &bias, bool use_bias) +{ + const vector &input_shape = input.shapes(); + const vector &kernel_shape = kernel.shapes(); + + const size_t B = output_shape[0]; + const size_t C_out = output_shape[1]; + const size_t H_out = output_shape[2]; + const size_t W_out = output_shape[3]; + + const size_t C_in = input_shape[1]; + const size_t H_in = input_shape[2]; + const size_t W_in = input_shape[3]; + + const size_t K_H = kernel_shape[2]; + const size_t K_W = kernel_shape[3]; + + Tensor<> output(output_shape, 0.0); + + /* + The logic behind is that + Let's us first focus on the first kernel among all out_channel kernels + + Each input channel of the data is convolved with the same channel of the kernel, and the result is added to the output + Meaning that each input data channel only corresponds to the same channel of the kernel + + For example, the channel 1 of the input data is convolved with the channel 1 of the kernel, but it will not be convolved with the channel 2 of the kernel + + After each input data channel convolving with the same channel of the kernel, element-wise addition is performed among all the convolved result with the first kernel + + Now we get a single output channel + + We repeat this process for all the out_channel channels + + And finally we will get an output with out_channel channels + */ + + for (size_t b = 0; b < B; ++b) + { + for (size_t c = 0; c < C_out; ++c) + { + for (size_t h = 0; h < H_out; ++h) + { + for (size_t w = 0; w < W_out; ++w) + { + size_t h_start = h * stride.first; + size_t w_start = w * stride.second; + + for (size_t ic = 0; ic < C_in; ++ic) + { + for (size_t kh = 0; kh < K_H; ++kh) + { + for (size_t kw = 0; kw < K_W; ++kw) + { + size_t h_in = h_start + kh * dilation.first; + size_t w_in = w_start + kw * dilation.second; + + if (h_in >= 0 && h_in < H_in && w_in >= 0 && w_in < W_in) + { + output[b, c, h, w] += input[b, ic, h_in, w_in] * kernel[c, ic, kh, kw]; + } + } + } + } + + if (use_bias) + { + output[b, c, h, w] += bias[c]; + } + } + } + } + } + + return output; +} + +std::tuple Conv2d::calculate_output_shape(const Tensor<> &input) +{ } From 91162908219fc504f9076cf715fe508b997ed84b Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Sun, 9 Feb 2025 17:02:51 +0800 Subject: [PATCH 13/36] feat: add conv2d.cpp --- CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 270c6af..55db136 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,6 +35,7 @@ set(SOURCE_FILES src/utils/tensor_utils.cpp src/core/module.cpp src/modules/layers/linear.cpp + src/modules/layers/conv2d.cpp src/modules/layers/dropout.cpp src/modules/layers/conv2d.cpp src/modules/losses/mse.cpp @@ -51,7 +52,7 @@ set(SOURCE_FILES add_library(neuralnet ${SOURCE_FILES}) # Add the executable for the main example -add_executable(main examples/main.cpp) +add_executable(main examples/test_conv2d.cpp) target_link_libraries(main neuralnet) # Only build tests if BUILD_TESTS is ON From 037805ef9cc9601ba06ac7a6a63a05dac0eab3a8 Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Sun, 9 Feb 2025 17:03:09 +0800 Subject: [PATCH 14/36] fat: add debug file for debugging convenience --- debug.sh | 4 ++++ 1 file changed, 4 insertions(+) create mode 100755 debug.sh diff --git a/debug.sh b/debug.sh new file mode 100755 index 0000000..2cf2f4d --- /dev/null +++ b/debug.sh @@ -0,0 +1,4 @@ +cd build/ +cmake -DCMAKE_BUILD_TYPE=Debug .. +make +lldb main \ No newline at end of file From 3d8151b5deeb3dc4e2ddd9f430c75fa0d7edf920 Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Sun, 9 Feb 2025 17:03:21 +0800 Subject: [PATCH 15/36] feat: add inference --- examples/main.cpp | 66 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 9 deletions(-) diff --git a/examples/main.cpp b/examples/main.cpp index 616e667..57a301f 100644 --- a/examples/main.cpp +++ b/examples/main.cpp @@ -6,7 +6,8 @@ #include "utils.hpp" using namespace nn; -int main() { +int main() +{ // Define the hyperparameters @@ -21,7 +22,8 @@ int main() { const string mnist_label_file = "../data/mnist/train-labels.idx1-ubyte"; // load MNIST data - if (!dataset.load_data(mnist_image_file, mnist_label_file)) { + if (!dataset.load_data(mnist_image_file, mnist_label_file)) + { cerr << "Failed to load dataset" << endl; return 1; } @@ -45,13 +47,15 @@ int main() { // // Train the model // Example of iterating through all batches - for (size_t e = 0; e < EPOCH; e++) { + for (size_t e = 0; e < EPOCH; e++) + { cout << "\nEpoch " << e + 1 << ":\n"; - dataset.reset(); // Reset batch counter at the start of each epoch + dataset.reset(); // Reset batch counter at the start of each epoch loss_list.clear(); accuracy_list.clear(); - - for (size_t i = 0; i < dataset.get_num_batches(); i++) { + + for (size_t i = 0; i < dataset.get_num_batches(); i++) + { auto batch = dataset.get_next_batch(); auto [data, labels] = batch.to_tensor(); @@ -59,9 +63,7 @@ int main() { Tensor<> output = model(data); loss = criterion(output, labels); - // cout << "After loss" << endl; acc = metrics::accuracy(output, labels); - // cout << "After acc" << endl; accuracy_list.push_back(acc); loss_list.push_back(loss); @@ -72,7 +74,7 @@ int main() { model.update_params(LR); // print the training stats - print_training_stats_line(i, loss, acc); + print_stats_line(i, loss, acc); } double total_loss = accumulate(loss_list.begin(), loss_list.end(), 0.0) / loss_list.size(); @@ -84,5 +86,51 @@ int main() { cout << "------------------------------------" << endl; } + // Inference + + model.eval(); + + const string mnist_image_file_test = "../data/mnist/t10k-images.idx3-ubyte"; + const string mnist_label_file_test = "../data/mnist/t10k-labels.idx1-ubyte"; + + MNIST test_dataset(BATCH_SIZE); + + if (!test_dataset.load_data(mnist_image_file_test, mnist_label_file_test)) + { + cerr << "Failed to load test dataset" << endl; + return 1; + } + + cout << "\n------------------------------------" << endl; + cout << "Testing started..." << endl; + + loss = 0.0; + acc = 0.0; + loss_list.clear(); + accuracy_list.clear(); + + for (size_t i = 0; i < test_dataset.get_num_batches(); i++) + { + auto batch = test_dataset.get_next_batch(); + auto [data, labels] = batch.to_tensor(); + + // forward propagation + Tensor<> output = model(data); + + loss = criterion(output, labels); + acc = metrics::accuracy(output, labels); + + accuracy_list.push_back(acc); + loss_list.push_back(loss); + + // print the testing stats + print_stats_line(i, loss, acc); + } + + double total_loss = accumulate(loss_list.begin(), loss_list.end(), 0.0) / loss_list.size(); + double total_acc = accumulate(accuracy_list.begin(), accuracy_list.end(), 0.0) / accuracy_list.size() * 100; + + cout << "------------------------------------" << endl; + return 0; } From a6af317b12eff0c791d934221d6315d3054d1056 Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Sun, 9 Feb 2025 17:03:40 +0800 Subject: [PATCH 16/36] feat: add conv2d playground --- examples/test_conv2d.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 examples/test_conv2d.cpp diff --git a/examples/test_conv2d.cpp b/examples/test_conv2d.cpp new file mode 100644 index 0000000..bb8de4a --- /dev/null +++ b/examples/test_conv2d.cpp @@ -0,0 +1,9 @@ +#include "conv2d.hpp" +using namespace nn; + +int main() +{ + Conv2d conv2d(1, 1, 3); + cout << "Conv2d layer initialized with in_channels = 1 and out_channels = 1" << endl; + return 0; +} \ No newline at end of file From d8753f67d2cc094a5176917c7a232b5d849659b3 Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Sun, 9 Feb 2025 17:03:57 +0800 Subject: [PATCH 17/36] fix: refractor and fix reduce impl --- include/core/tensor.hpp | 1725 ++++++++++++++++++++++----------------- 1 file changed, 968 insertions(+), 757 deletions(-) diff --git a/include/core/tensor.hpp b/include/core/tensor.hpp index 846f3d3..02f21f7 100644 --- a/include/core/tensor.hpp +++ b/include/core/tensor.hpp @@ -1,921 +1,1132 @@ #pragma once -#include -#include #include "tensor_utils.hpp" using namespace std; - template -class Tensor { - private: - shared_ptr> data_ = nullptr; // data is stored as a 1D vector // shared_ptr is used to avoid copying data - vector shapes_; // store the dimensions of the tensor - vector strides_; // store the strides of the tensor - size_t offset_ = 0; // offset for slicing - mutable int64_t size_ = -1; // it can be changed by const member functions (in size() function) - - // Helper function to calculate the index in the 1D vector for a given set of indices expressed in the form of N-D vector - size_t calculate_idx(const vector& idxs) const { - size_t idx = this->offset_; - for (size_t i = 0; i < idxs.size(); ++i) { - idx += idxs[i] * this->strides_[i]; - } - return idx; +class Tensor +{ +private: + shared_ptr> data_ = nullptr; // data is stored as a 1D vector // shared_ptr is used to avoid copying data + vector shapes_; // store the dimensions of the tensor + vector strides_; // store the strides of the tensor + size_t offset_ = 0; // offset for slicing + mutable int64_t size_ = -1; // it can be changed by const member functions (in size() function) + + // Helper function to calculate the index in the 1D vector for a given set of indices expressed in the form of N-D vector + size_t calculate_idx(const vector &idxs) const + { + size_t idx = this->offset_; + for (size_t i = 0; i < idxs.size(); ++i) + { + idx += idxs[i] * this->strides_[i]; + } + return idx; + } + + // Helper function for printing since we don't know the number of dimensions + void print_recursive_impl(size_t dim, size_t offset, int indent = 0) const + { + const string indent_str(indent, ' '); + + // Handle empty dimensions + if (this->shapes_[dim] == 0) + { + cout << indent_str << "[]"; + return; } - // Helper function for printing since we don't know the number of dimensions - void print_recursive_impl(size_t dim, size_t offset, int indent = 0) const { - const string indent_str(indent, ' '); + cout << indent_str << "["; - // Handle empty dimensions - if (this->shapes_[dim] == 0) { - cout << indent_str << "[]"; - return; + if (dim == this->ndim() - 1) + { // Last dimension + for (size_t i = 0; i < this->shapes_[dim]; ++i) + { + cout << (*this->data_)[offset + i * this->strides_[dim]]; + if (i < this->shapes_[dim] - 1) + cout << ", "; } - - cout << indent_str << "["; - - if (dim == this->ndim() - 1) { // Last dimension - for (size_t i = 0; i < this->shapes_[dim]; ++i) { - cout << (*this->data_)[offset + i * this->strides_[dim]]; - if (i < this->shapes_[dim] - 1) cout << ", "; - } - } else { - cout << "\n"; - for (size_t i = 0; i < this->shapes_[dim]; ++i) { - print_recursive_impl(dim + 1, offset + i * this->strides_[dim], indent + 2); - if (i < this->shapes_[dim] - 1) cout << ",\n"; - } - cout << "\n" << indent_str; + } + else + { + cout << "\n"; + for (size_t i = 0; i < this->shapes_[dim]; ++i) + { + print_recursive_impl(dim + 1, offset + i * this->strides_[dim], indent + 2); + if (i < this->shapes_[dim] - 1) + cout << ",\n"; } - cout << "]"; + cout << "\n" + << indent_str; + } + cout << "]"; + } + + // Helper function for operator[] overloading + template + const vector get_idxs(Indices... indices) const + { + // Convert variadic arguments to vector + vector idxs({static_cast(indices)...}); + vector normalized_idxs; + + // for better performance, reserve the size of the vector + normalized_idxs.reserve(idxs.size()); + + // Check bounds + for (size_t i = 0; i < idxs.size(); ++i) + { + size_t normalized_idx = normalize_index(idxs[i], this->shapes_[i]); + normalized_idxs.push_back(normalized_idx); } - // Helper function for operator[] overloading - template - const vector get_idxs(Indices... indices) const { - // Convert variadic arguments to vector - vector idxs({static_cast(indices)...}); - vector normalized_idxs; + return normalized_idxs; + } + + /** + * Reduces a 1D or 2D tensor along its rows using the specified reduction operation. + * + * @tparam U The data type of the resulting tensor. Defaults to the type of the current tensor. + * @param op The reduction operation to perform. Supported operations are MAX, MIN, ARGMAX, and ARGMIN. + * @return A Tensor of shape (num_rows, 1) containing the reduced values or indices. + * @throws runtime_error if the tensor's number of dimensions is greater than 2. + */ + + template + Tensor reduce_impl(ReduceOp op) const + { + const size_t ndim = this->ndim(); + + if (ndim > 2) + { + throw std::runtime_error("Only 1D and 2D tensors are supported for reduce"); + } - // for better performance, reserve the size of the vector - normalized_idxs.reserve(idxs.size()); + // Determine tensor dimensions + const size_t num_rows = (ndim == 2) ? this->shapes_[0] : 1; + const size_t num_cols = (ndim == 2) ? this->shapes_[1] : this->shapes_[0]; - // Check bounds - for (size_t i = 0; i < idxs.size(); ++i) { - size_t normalized_idx = normalize_index(idxs[i], this->shapes_[i]); - normalized_idxs.push_back(normalized_idx); - } + vector result(num_rows); - return normalized_idxs; - } + for (size_t i = 0; i < num_rows; ++i) + { + // Calculate base offset for current row + size_t row_offset = this->offset_; + if (ndim == 2) + { + row_offset += i * this->strides_[0]; + } - /** - * Reduces a 1D or 2D tensor along its rows using the specified reduction operation. - * - * @tparam U The data type of the resulting tensor. Defaults to the type of the current tensor. - * @param op The reduction operation to perform. Supported operations are MAX, MIN, ARGMAX, and ARGMIN. - * @return A Tensor of shape (num_rows, 1) containing the reduced values or indices. - * @throws runtime_error if the tensor's number of dimensions is greater than 2. - */ + T extreme_val = (*this->data_)[row_offset]; + size_t extreme_idx = 0; - template - Tensor reduce_impl(ReduceOp op) const { - if (this->ndim() > 2) { - throw std::runtime_error("Only 1D and 2D tensors are supported for reduce"); - } + // Process elements using stride-aware indexing + for (size_t j = 1; j < num_cols; ++j) + { + size_t elem_offset = row_offset; + if (ndim == 2) + { + elem_offset += j * this->strides_[1]; + } + else + { + elem_offset += j * this->strides_[0]; + } - const size_t num_rows = (this->ndim() == 2)? this->shapes_[0] : 1; - const size_t num_cols = (this->ndim() == 2)? this->shapes_[1] : this->shapes_[0]; - - - // Result will be a tensor of shape (num_rows, 1) - vector result(num_rows); - - for (size_t i = 0; i < num_rows; i++) { - size_t start_idx = i * num_cols; - - // Initialize with first element in row - T extreme_val = (*this->data_)[start_idx]; - size_t extreme_idx = 0; - - // Process remaining elements in the row - for (size_t j = 1; j < num_cols; j++) { - size_t curr_idx = start_idx + j; - bool update = false; - - switch (op) { - case ReduceOp::MAX: - case ReduceOp::ARGMAX: - update = (*this->data_)[curr_idx] > extreme_val; - break; - case ReduceOp::MIN: - case ReduceOp::ARGMIN: - update = (*this->data_)[curr_idx] < extreme_val; - break; - } - - if (update) { - extreme_val = (*this->data_)[curr_idx]; - extreme_idx = j; - } + bool update = false; + switch (op) + { + case ReduceOp::MAX: + case ReduceOp::ARGMAX: + update = (*this->data_)[elem_offset] > extreme_val; + break; + case ReduceOp::MIN: + case ReduceOp::ARGMIN: + update = (*this->data_)[elem_offset] < extreme_val; + break; } - - // Store the result - switch (op) { - case ReduceOp::MAX: - case ReduceOp::MIN: - result[i] = extreme_val; - break; - case ReduceOp::ARGMAX: - case ReduceOp::ARGMIN: - result[i] = extreme_idx; - break; + + if (update) + { + extreme_val = (*this->data_)[elem_offset]; + extreme_idx = j; } } - - return Tensor(result); - } - Tensor arithmetic_operation_impl(ArithmeticOp op, const Tensor& other) const { - if (other.shapes_ != this->shapes_) { - throw runtime_error("Shape mismatch in arithmetic operation"); + switch (op) + { + case ReduceOp::MAX: + case ReduceOp::MIN: + result[i] = static_cast(extreme_val); + break; + case ReduceOp::ARGMAX: + case ReduceOp::ARGMIN: + result[i] = static_cast(extreme_idx); + break; } + } - size_t ndim = this->ndim(); - - Tensor result(this->shapes_, static_cast(0)); - - // Precompute result's contiguous strides for index calculation - const vector& result_strides = result.strides_; - - for (size_t i = 0; i < this->size(); i++) { - auto [a_offset, b_offset] = calculate_tensors_offsets(i, ndim, result_strides, other); - - switch (op) { - case ArithmeticOp::ADD: - (*result.data_)[i] = (*this->data_)[a_offset] + (*other.data_)[b_offset]; - break; - case ArithmeticOp::SUB: - (*result.data_)[i] = (*this->data_)[a_offset] - (*other.data_)[b_offset]; - break; - case ArithmeticOp::MUL: - (*result.data_)[i] = (*this->data_)[a_offset] * (*other.data_)[b_offset]; - break; - case ArithmeticOp::DIV: - (*result.data_)[i] = (*this->data_)[a_offset] / (*other.data_)[b_offset]; - break; - } + return Tensor(result); + } + + Tensor arithmetic_operation_impl(ArithmeticOp op, const Tensor &other) const + { + if (other.shapes_ != this->shapes_) + { + throw runtime_error("Shape mismatch in arithmetic operation"); + } + + size_t ndim = this->ndim(); + + Tensor result(this->shapes_, static_cast(0)); + + // Precompute result's contiguous strides for index calculation + const vector &result_strides = result.strides_; + + for (size_t i = 0; i < this->size(); i++) + { + auto [a_offset, b_offset] = calculate_tensors_offsets(i, ndim, result_strides, other); + + switch (op) + { + case ArithmeticOp::ADD: + (*result.data_)[i] = (*this->data_)[a_offset] + (*other.data_)[b_offset]; + break; + case ArithmeticOp::SUB: + (*result.data_)[i] = (*this->data_)[a_offset] - (*other.data_)[b_offset]; + break; + case ArithmeticOp::MUL: + (*result.data_)[i] = (*this->data_)[a_offset] * (*other.data_)[b_offset]; + break; + case ArithmeticOp::DIV: + (*result.data_)[i] = (*this->data_)[a_offset] / (*other.data_)[b_offset]; + break; } - return result; } + return result; + } - // Helper function to cacluate the stride of the tensor - void calculate_strides() { - this->strides_.resize(this->ndim(), 0); - vector strides(this->ndim()); + // Helper function to cacluate the stride of the tensor + void calculate_strides() + { + this->strides_.resize(this->ndim(), 0); + vector strides(this->ndim()); - int64_t stride = 1; + int64_t stride = 1; - for (int64_t i = this->ndim() - 1; i >= 0; --i) { - this->strides_[i] = stride; - stride *= this->shapes_[i]; - } + for (int64_t i = this->ndim() - 1; i >= 0; --i) + { + this->strides_[i] = stride; + stride *= this->shapes_[i]; } + } - std::tuple calculate_tensors_offsets(const size_t idx, const size_t ndim, const vector& result_strides, const Tensor& other) const { - vector indices(ndim); + std::tuple calculate_tensors_offsets(const size_t idx, const size_t ndim, const vector &result_strides, const Tensor &other) const + { + vector indices(ndim); - size_t remaining = idx; + size_t remaining = idx; - for (int dim = 0; dim < ndim; ++dim) { - indices[dim] = remaining / result_strides[dim]; - remaining %= result_strides[dim]; - } + for (int dim = 0; dim < ndim; ++dim) + { + indices[dim] = remaining / result_strides[dim]; + remaining %= result_strides[dim]; + } - // Calculate offsets using original tensors' strides - size_t a_offset = this->offset_; - size_t b_offset = other.offset_; + // Calculate offsets using original tensors' strides + size_t a_offset = this->offset_; + size_t b_offset = other.offset_; - for (int dim = 0; dim < ndim; ++dim) { - a_offset += indices[dim] * this->strides_[dim]; - b_offset += indices[dim] * other.strides_[dim]; - } + for (int dim = 0; dim < ndim; ++dim) + { + a_offset += indices[dim] * this->strides_[dim]; + b_offset += indices[dim] * other.strides_[dim]; + } - return {a_offset, b_offset}; - } - - // Declare friendship so that TensorView can access private members of Tensor - template - friend Tensor dtype_impl(const Tensor& tensor); - - public: - Tensor() = default; - - // Helper to recursively flatten nested vectors and compute shapes - template - void flatten_vector(const std::vector& vec, size_t depth = 0) { - // Add current level's size to shapes - if (depth == this->shapes_.size()) { - // First encounter with this depth: record size - this->shapes_.push_back(vec.size()); - - } - else { - // Verify size matches the existing dimension - if (vec.size() != this->shapes_[depth]) { - throw std::invalid_argument("Inconsistent shape at depth " + std::to_string(depth)); - } + return {a_offset, b_offset}; + } + + // Declare friendship so that TensorView can access private members of Tensor + template + friend Tensor dtype_impl(const Tensor &tensor); + +public: + Tensor() = default; + + // Helper to recursively flatten nested vectors and compute shapes + template + void flatten_vector(const std::vector &vec, size_t depth = 0) + { + // Add current level's size to shapes + if (depth == this->shapes_.size()) + { + // First encounter with this depth: record size + this->shapes_.push_back(vec.size()); + } + else + { + // Verify size matches the existing dimension + if (vec.size() != this->shapes_[depth]) + { + throw std::invalid_argument("Inconsistent shape at depth " + std::to_string(depth)); } + } - if constexpr (is_vector::value) { - // Ensure nested vectors have consistent sizes at this level - if (!vec.empty()) { - size_t expected_size = vec[0].size(); - for (const auto& elem : vec) { - if (elem.size() != expected_size) { - throw std::invalid_argument("Inconsistent shape in nested vectors"); - } + if constexpr (is_vector::value) + { + // Ensure nested vectors have consistent sizes at this level + if (!vec.empty()) + { + size_t expected_size = vec[0].size(); + for (const auto &elem : vec) + { + if (elem.size() != expected_size) + { + throw std::invalid_argument("Inconsistent shape in nested vectors"); } } + } - // Recurse into nested vectors - for (const auto& elem : vec) { - flatten_vector(elem, depth + 1); - } - } else { - // Ensure leaf elements match the Tensor's data type - // static_assert(std::is_same_v, "Element type must match Tensor type"); - this->data_->reserve(this->data_->size() + vec.size()); - for (const auto& elem : vec) { - this->data_->emplace_back(static_cast(elem)); - } + // Recurse into nested vectors + for (const auto &elem : vec) + { + flatten_vector(elem, depth + 1); } } - - // Constructor for nested vectors - template - Tensor(const std::vector& input) { - this->data_ = make_shared>(); - flatten_vector(input); - this->calculate_strides(); - } - - // // Recursive helper to process nested initializer lists - // template - // void flatten_list(const std::initializer_list& list, size_t depth = 0) { - // // Handle the current dimension - // if (depth == shapes_.size()) { - // // First encounter with this depth: record size - // shapes_.push_back(list.size()); - // } else { - // // Verify size matches the existing dimension - // if (list.size() != shapes_[depth]) { - // throw std::invalid_argument("Inconsistent shape at depth " + std::to_string(depth)); - // } - // } - - // // Recurse or add data - // if constexpr (is_list::value) { - // // Process nested lists - // for (const auto& elem : list) { - // flatten_list(elem, depth + 1); - // } - // } else { - // // Ensure element type matches Tensor type - // // static_assert(std::is_same_v, "Element type must match Tensor type"); - // for (const auto& elem : list) { - // data_.push_back(static_cast(elem)); - // } - // } - // } - - - // Scaler constructor - Tensor(const T& value) { - this->shapes_ = vector {1}; - this->data_ = make_shared>(1, value); - this->calculate_strides(); + else + { + // Ensure leaf elements match the Tensor's data type + // static_assert(std::is_same_v, "Element type must match Tensor type"); + this->data_->reserve(this->data_->size() + vec.size()); + for (const auto &elem : vec) + { + this->data_->emplace_back(static_cast(elem)); + } } - - // 1D tensor constructor - Tensor(const initializer_list& data_1d) { - this->data_ = make_shared>(data_1d.begin(), data_1d.end()); - this->shapes_ = vector { data_1d.size() }; - this->calculate_strides(); + } + + // Constructor for nested vectors + template + Tensor(const std::vector &input) + { + this->data_ = make_shared>(); + flatten_vector(input); + this->calculate_strides(); + } + + // // Recursive helper to process nested initializer lists + // template + // void flatten_list(const std::initializer_list& list, size_t depth = 0) { + // // Handle the current dimension + // if (depth == shapes_.size()) { + // // First encounter with this depth: record size + // shapes_.push_back(list.size()); + // } else { + // // Verify size matches the existing dimension + // if (list.size() != shapes_[depth]) { + // throw std::invalid_argument("Inconsistent shape at depth " + std::to_string(depth)); + // } + // } + + // // Recurse or add data + // if constexpr (is_list::value) { + // // Process nested lists + // for (const auto& elem : list) { + // flatten_list(elem, depth + 1); + // } + // } else { + // // Ensure element type matches Tensor type + // // static_assert(std::is_same_v, "Element type must match Tensor type"); + // for (const auto& elem : list) { + // data_.push_back(static_cast(elem)); + // } + // } + // } + + // Scaler constructor + Tensor(const T &value) + { + this->shapes_ = vector{1}; + this->data_ = make_shared>(1, value); + this->calculate_strides(); + } + + // 1D tensor constructor + Tensor(const initializer_list &data_1d) + { + this->data_ = make_shared>(data_1d.begin(), data_1d.end()); + this->shapes_ = vector{data_1d.size()}; + this->calculate_strides(); + } + + // 2D tensor constructor + Tensor(const initializer_list> &data_2d) + { + const size_t n = data_2d.size(), m = data_2d.begin()->size(); + + this->shapes_ = vector{n, m}; + + this->data_ = make_shared>(); + this->data_->reserve(n * m); // Optimize memory allocation + + for (const initializer_list &row : data_2d) + { + this->data_->insert(this->data_->end(), row.begin(), row.end()); } + this->calculate_strides(); + } - // 2D tensor constructor - Tensor(const initializer_list>& data_2d) { - const size_t n = data_2d.size(), m = data_2d.begin()->size(); + // 3D tensor constructor + Tensor(const initializer_list>> &data_3d) + { + const size_t n = data_3d.size(), m = data_3d.begin()->size(), l = data_3d.begin()->begin()->size(); - this->shapes_ = vector { n, m }; + this->shapes_ = vector{n, m, l}; - this->data_ = make_shared>(); - this->data_->reserve(n * m); // Optimize memory allocation + this->data_ = make_shared>(); + this->data_->reserve(n * m * l); // Optimize memory allocation - for (const initializer_list& row : data_2d) { + for (const initializer_list> &matrix : data_3d) + { + for (const initializer_list &row : matrix) + { this->data_->insert(this->data_->end(), row.begin(), row.end()); } - this->calculate_strides(); } + this->calculate_strides(); + } - // 3D tensor constructor - Tensor(const initializer_list>>& data_3d) { - const size_t n = data_3d.size(), m = data_3d.begin()->size(), l = data_3d.begin()->begin()->size(); + // 4D tensor constructor + Tensor(const initializer_list>>> &data_4d) + { + const size_t n = data_4d.size(), m = data_4d.begin()->size(), l = data_4d.begin()->begin()->size(), k = data_4d.begin()->begin()->begin()->size(); - this->shapes_ = vector { n, m, l }; + this->shapes_ = vector{n, m, l, k}; - this->data_ = make_shared>(); - this->data_->reserve(n * m * l); // Optimize memory allocation + this->data_ = make_shared>(); + this->data_->reserve(n * m * l * k); // Optimize memory allocation - for (const initializer_list>& matrix : data_3d) { - for (const initializer_list& row : matrix) { + for (const initializer_list>> &tensor : data_4d) + { + for (const initializer_list> &matrix : tensor) + { + for (const initializer_list &row : matrix) + { this->data_->insert(this->data_->end(), row.begin(), row.end()); } } - this->calculate_strides(); } - - // 4D tensor constructor - Tensor(const initializer_list>>>& data_4d) { - const size_t n = data_4d.size(), m = data_4d.begin()->size(), l = data_4d.begin()->begin()->size(), k = data_4d.begin()->begin()->begin()->size(); - - this->shapes_ = vector { n, m, l, k }; - - this->data_ = make_shared>(); - this->data_->reserve(n * m * l * k); // Optimize memory allocation - - for (const initializer_list>>& tensor : data_4d) { - for (const initializer_list>& matrix : tensor) { - for (const initializer_list& row : matrix) { - this->data_->insert(this->data_->end(), row.begin(), row.end()); - } - } - } - this->calculate_strides(); + this->calculate_strides(); + } + + // certin value constructor + Tensor(const vector &shape, const T &value) + { + this->shapes_ = shape; + size_t size = 1; + for (const size_t &dim : shape) + { + size *= dim; } - // certin value constructor - explicit Tensor(const vector& shape, const T& value) { - this->shapes_ = shape; - size_t size = 1; - for (const size_t& dim : shape) { - size *= dim; - } - - this->data_ = make_shared>(size, value); - this->calculate_strides(); + this->data_ = make_shared>(size, value); + this->calculate_strides(); + } + + // copy constructor + Tensor(const Tensor &other) + { + // already overload the = operator + *this = other; + } + + // template + // Tensor(const Tensor &other) + // { + // // use dtype function to convert the data type + // *this = other.dtype<>(); + // } + + // Add two tensors with same shape, element-wise + inline Tensor add(const Tensor &other) const + { + return arithmetic_operation_impl(ArithmeticOp::ADD, other); + } + + // Subtract two tensors with same shape, element-wise + inline Tensor sub(const Tensor &other) const + { + return arithmetic_operation_impl(ArithmeticOp::SUB, other); + } + + // Multiply two tensors with same shape, element-wise + inline Tensor mul(const Tensor &other) const + { + return arithmetic_operation_impl(ArithmeticOp::MUL, other); + } + + // Divide two tensors with same shape, element-wise + inline Tensor div(const Tensor &other) const + { + return arithmetic_operation_impl(ArithmeticOp::DIV, other); + } + + // Multiply all elements of tensor with the given scaler + Tensor mul(const T &scaler) const + { + Tensor result(this->shapes_, static_cast(0)); + + for (size_t i = 0; i < this->size(); i++) + { + (*result.data_)[i] = (*this->data_)[i] * scaler; } - - // copy constructor - Tensor(const Tensor& other) { - *this = other; + return result; + } + + /** + * Matrix multiplication of two tensors. + * + * The two tensors must have at least two dimensions. The leading dimensions (all except last two) must be equal. + * The last two dimensions must match the matrix multiplication dimensions. + * For example, if the first tensor has shape [a, b, n, m] and the second tensor has shape [a, b, m, p], the result will have shape [a, b, n, p]. + * + * The result is a tensor with the leading dimensions of the first tensor and the matrix multiplication result as the last two dimensions. + * + * The total number of batches is the product of the leading dimensions. + * + * The matrix multiplication is performed batched, i.e., for each batch, a matrix multiplication is performed. + * + * @param other The tensor to multiply with. + * @return The result of the matrix multiplication. + */ + Tensor matmul(const Tensor &other) const + { + // Ensure both tensors have at least 2 dimensions + size_t A_ndim = this->ndim(), B_ndim = other.ndim(); + + if (A_ndim < 2 || B_ndim < 2) + { + throw std::runtime_error("Tensors must have at least 2 dimensions for matrix multiplication"); } - // Add two tensors with same shape, element-wise - inline Tensor add(const Tensor& other) const { - return arithmetic_operation_impl(ArithmeticOp::ADD, other); - } + // Check leading dimensions (all except last two) are equal + const size_t A_leading_ndim = A_ndim - 2; + const size_t B_leading_ndim = B_ndim - 2; - // Subtract two tensors with same shape, element-wise - inline Tensor sub(const Tensor& other) const { - return arithmetic_operation_impl(ArithmeticOp::SUB, other); + if (A_leading_ndim != B_leading_ndim) + { + throw std::runtime_error("Number of leading dimensions must match"); } - // Multiply two tensors with same shape, element-wise - inline Tensor mul(const Tensor& other) const { - return arithmetic_operation_impl(ArithmeticOp::MUL, other); - } + vector A_leading_shape(this->shapes_.begin(), this->shapes_.end() - 2); + vector B_leading_shape(other.shapes_.begin(), other.shapes_.end() - 2); - // Divide two tensors with same shape, element-wise - inline Tensor div(const Tensor& other) const { - return arithmetic_operation_impl(ArithmeticOp::DIV, other); + if (A_leading_shape != B_leading_shape) + { + throw invalid_argument("Batch dimensions must match"); } - // Multiply all elements of tensor with the given scaler - Tensor mul(const T& scaler) const { - Tensor result(this->shapes_, static_cast(0)); + // Extract matrix dimensions + const size_t n = this->shapes_[A_ndim - 2]; + const size_t m = this->shapes_[A_ndim - 1]; + const size_t m_other = other.shapes_[B_ndim - 2]; + const size_t p = other.shapes_[B_ndim - 1]; - for (size_t i = 0; i < this->size(); i++) { - (*result.data_)[i] = (*this->data_)[i] * scaler; - } - return result; + if (m != m_other) + { + throw std::invalid_argument("Matrix dimension mismatch: last dimension of first tensor must match second last of second tensor"); } - - /** - * Matrix multiplication of two tensors. - * - * The two tensors must have at least two dimensions. The leading dimensions (all except last two) must be equal. - * The last two dimensions must match the matrix multiplication dimensions. - * For example, if the first tensor has shape [a, b, n, m] and the second tensor has shape [a, b, m, p], the result will have shape [a, b, n, p]. - * - * The result is a tensor with the leading dimensions of the first tensor and the matrix multiplication result as the last two dimensions. - * - * The total number of batches is the product of the leading dimensions. - * - * The matrix multiplication is performed batched, i.e., for each batch, a matrix multiplication is performed. - * - * @param other The tensor to multiply with. - * @return The result of the matrix multiplication. - */ - Tensor matmul(const Tensor& other) const { - // Ensure both tensors have at least 2 dimensions - size_t A_ndim = this->ndim(), B_ndim = other.ndim(); - - if (A_ndim < 2 || B_ndim < 2) { - throw std::runtime_error("Tensors must have at least 2 dimensions for matrix multiplication"); - } - - // Check leading dimensions (all except last two) are equal - const size_t A_leading_ndim = A_ndim - 2; - const size_t B_leading_ndim = B_ndim - 2; - - if (A_leading_ndim != B_leading_ndim) { - throw std::runtime_error("Number of leading dimensions must match"); - } + // Determine result shape: leading dimensions + [n, p] + vector result_shapes = A_leading_shape; + result_shapes.push_back(n); + result_shapes.push_back(p); - vector A_leading_shape(this->shapes_.begin(), this->shapes_.end() - 2); - vector B_leading_shape(other.shapes_.begin(), other.shapes_.end() - 2); - - if (A_leading_shape != B_leading_shape) { - throw invalid_argument("Batch dimensions must match"); - } + Tensor result(result_shapes, static_cast(0)); - // Extract matrix dimensions - const size_t n = this->shapes_[A_ndim - 2]; - const size_t m = this->shapes_[A_ndim - 1]; - const size_t m_other = other.shapes_[B_ndim - 2]; - const size_t p = other.shapes_[B_ndim - 1]; + // Compute total number of batches (product of leading dimensions) + // may be we can use divisoin in stride to have O(1) time + size_t total_batches = 1; + for (const size_t &dim : A_leading_shape) + { + total_batches *= dim; + } - if (m != m_other) { - throw std::invalid_argument("Matrix dimension mismatch: last dimension of first tensor must match second last of second tensor"); + for (size_t batch = 0; batch < total_batches; ++batch) + { + // Get multi_dimensional indices for this batch + vector indices = linear_to_multi_idxs(batch, A_leading_shape); + + // Compute offsets for A, B, and result + size_t A_offset = this->offset_; + size_t B_offset = other.offset_; + size_t result_offset = 0; + + for (size_t i = 0; i < A_leading_ndim; ++i) + { + A_offset += indices[i] * this->strides_[i]; + B_offset += indices[i] * other.strides_[i]; + result_offset += indices[i] * result.strides_[i]; } - // Determine result shape: leading dimensions + [n, p] - vector result_shapes = A_leading_shape; - result_shapes.push_back(n); - result_shapes.push_back(p); - - Tensor result(result_shapes, static_cast(0)); + for (size_t i = 0; i < n; ++i) + { + for (size_t j = 0; j < p; ++j) + { + T sum = static_cast(0); - // Compute total number of batches (product of leading dimensions) - // may be we can use divisoin in stride to have O(1) time - size_t total_batches = 1; - for (const size_t& dim: A_leading_shape) { - total_batches *= dim; - } + for (size_t k = 0; k < m; ++k) + { + // Calculate offsets in A and B + size_t a_idx = A_offset + + i * this->strides_[A_leading_ndim] + + k * this->strides_[A_leading_ndim + 1]; - for (size_t batch = 0; batch < total_batches; ++batch) { - // Get multi_dimensional indices for this batch - vector indices = linear_to_multi_idxs(batch, A_leading_shape); + size_t b_idx = B_offset + + k * other.strides_[B_leading_ndim] + + j * other.strides_[B_leading_ndim + 1]; - // Compute offsets for A, B, and result - size_t A_offset = this->offset_; - size_t B_offset = other.offset_; - size_t result_offset = 0; - - for (size_t i = 0; i < A_leading_ndim; ++i) { - A_offset += indices[i] * this->strides_[i]; - B_offset += indices[i] * other.strides_[i]; - result_offset += indices[i] * result.strides_[i]; - } - - for (size_t i = 0; i < n; ++i) { - for (size_t j = 0; j < p; ++j) { - T sum = static_cast(0); - - for (size_t k = 0; k < m; ++k) { - // Calculate offsets in A and B - size_t a_idx = A_offset + - i * this->strides_[A_leading_ndim] + - k * this->strides_[A_leading_ndim + 1]; - - size_t b_idx = B_offset + - k * other.strides_[B_leading_ndim] + - j * other.strides_[B_leading_ndim + 1]; - - sum += (*this->data_)[a_idx] * (*other.data_)[b_idx]; - } - // Write to result - size_t out_idx = result_offset + - i * result.strides_[result.ndim() - 2] + - j * result.strides_.back(); - (*result.data_)[out_idx] = sum; + sum += (*this->data_)[a_idx] * (*other.data_)[b_idx]; } + // Write to result + size_t out_idx = result_offset + + i * result.strides_[result.ndim() - 2] + + j * result.strides_.back(); + (*result.data_)[out_idx] = sum; } } - - return result; } - - /// @brief Transpose the tensor. - /// @details This function supports transposing 1D and 2D tensors. - /// 1D tensors are transposed from shape (1, n) to (n, 1). - /// For 2D tensors, it swaps rows and columns. - /// @return A new tensor that is the transpose of the original tensor. - /// @throws runtime_error if the tensor has more than 2 dimensions. - - Tensor transpose(int64_t dim0=-2, int64_t dim1=-1) const { - const size_t ndim = this->ndim(); - - if (ndim == 1 && dim0 == -2 && dim1 == -1) { - Tensor result = *this; - result.reshape({this->size(), 1}); - return result; - } - - if (dim0 == dim1) { - return *this; // No-op if dimensions are the same - } - - if (dim0 < 0) { - dim0 += ndim; - } - - if (dim1 < 0) { - dim1 += ndim; - } - - if (dim0 < 0 || dim0 >= ndim || dim1 < 0 || dim1 >= ndim) { - throw out_of_range("Transpose dimensions out of range"); - } + return result; + } - // Create new tensor with swapped dimensions - Tensor result = *this; - swap(result.shapes_[dim0], result.shapes_[dim1]); - swap(result.strides_[dim0], result.strides_[dim1]); + /// @brief Transpose the tensor. + /// @details This function supports transposing 1D and 2D tensors. + /// 1D tensors are transposed from shape (1, n) to (n, 1). + /// For 2D tensors, it swaps rows and columns. + /// @return A new tensor that is the transpose of the original tensor. + /// @throws runtime_error if the tensor has more than 2 dimensions. - // cout << "result.shapes_: " << result.shapes_[0] << " " << result.shapes_[1] << endl; - // cout << "result.strides_: " << result.strides_[0] << " " << result.strides_[1] << endl; + Tensor transpose(int64_t dim0 = -2, int64_t dim1 = -1) const + { + const size_t ndim = this->ndim(); + if (ndim == 1 && dim0 == -2 && dim1 == -1) + { + Tensor result = *this; + result.reshape({this->size(), 1}); return result; } - /// @brief Flatten the tensor into 1D in-place. - /// @details This function only changes the shape of the tensor, and does not modify the underlying data. - /// @post The shape of the tensor is changed to 1D, with the same elements as the original tensor. - void flatten() { - this->shapes_ = { this->size() }; - this->calculate_strides(); - return; + if (dim0 == dim1) + { + return *this; // No-op if dimensions are the same } - /// @brief Calculate the absolute value of each element in the tensor - /// @return a new tensor with the same shape as the original, but with each element replaced by its absolute value - Tensor abs() const { - Tensor result(this->shapes_, static_cast(0)); - - for (size_t i = 0; i < this->size(); i++) { - (*result.data_)[i] = std::abs((*this->data_)[i]); - } - - return result; + if (dim0 < 0) + { + dim0 += ndim; } - /// @brief Filter the tensor with the given function - /// @param func a function to test each element of the tensor. It should return true if the element passes the test - /// @return a new tensor with the same shape as the original, but all elements that fail the test are set to 0. - Tensor filter(bool (*func)(T)) const { - Tensor result(this->shapes_, static_cast(0)); - - for (size_t i = 0; i < this->size(); i++) { - if (func((*this->data_)[i])) { - (*result.data_)[i] = (*this->data_)[i]; - } - } + if (dim1 < 0) + { + dim1 += ndim; + } - return result; + if (dim0 < 0 || dim0 >= ndim || dim1 < 0 || dim1 >= ndim) + { + throw out_of_range("Transpose dimensions out of range"); } - /// @brief Perform element-wise transformation with a function - /// @param func a function to perform element-wise transformation to the tensor - /// @return a new tensor with the same shape as the original, but with each element transformed by the given func - Tensor map(T (*func)(T)) const { - Tensor result(this->shapes_, static_cast(0)); + // Create new tensor with swapped dimensions + Tensor result = *this; + swap(result.shapes_[dim0], result.shapes_[dim1]); + swap(result.strides_[dim0], result.strides_[dim1]); - for (size_t i = 0; i < this->size(); i++) { - (*result.data_)[i] = func((*this->data_)[i]); - } + // cout << "result.shapes_: " << result.shapes_[0] << " " << result.shapes_[1] << endl; + // cout << "result.strides_: " << result.strides_[0] << " " << result.strides_[1] << endl; - return result; - } + return result; + } - /// @brief Calculate the sum of all elements in the tensor - /// @return The sum of all elements in the tensor, regardless of the dimension - T sum() const { - T sum = static_cast(0); + Tensor permute(const initializer_list &dims) + { + size_t ndim = this->ndim(); - for (size_t i = 0; i < this->size(); i++) { - sum += (*this->data_)[i]; - } - - return sum; + if (dims.size() != ndim) + { + throw std::invalid_argument("Number of dimensions in permutation must match tensor's number of dimensions"); } - /// @brief Check if all elements of two tensors are equal - /// @param other Tensor to compare - /// @return Tensor of integers where each element is 1 if the two tensors are equal at the same index, 0 otherwise - Tensor equal(const Tensor& other) const{ - if (other.shapes_ != this->shapes_) { - throw runtime_error("Shape mismatch"); + unordered_set seen_dims; + for (int64_t dim : dims) + { + if (dim < 0 || dim >= ndim) + { + throw out_of_range("Permute dimension out of range"); } - - Tensor result(this->shapes_, static_cast(0)); - const vector& result_strides = result.strides_; - - for (size_t i = 0; i < this->size(); i++) { - auto [a_offset, b_offset] = calculate_tensors_offsets(i, this->ndim(), result_strides, other); - - (*result.data_)[i] = (*this->data_)[a_offset] == (*other.data_)[b_offset]; + if (seen_dims.count(dim)) + { + throw invalid_argument("Duplicate dimension in permute"); } - - return result.dtype(); + seen_dims.insert(dim); } - /// @brief Check if all elements of two tensors are equal - /// @param other Tensor to compare - /// @return true if all elements are equal, false otherwise - bool compare(const Tensor& other) const { - if (other.shapes_ != this->shapes_) { - throw runtime_error("Shape mismatch"); - } - - for (size_t i = 0; i < this->size(); i++) { - auto [a_offset, b_offset] = calculate_tensors_offsets(i, this->ndim(), this->strides_, other); + vector new_shapes(ndim); + vector new_strides(ndim); - if ((*this->data_)[a_offset] != (*other.data_)[b_offset]) { - return false; - } + int64_t i = 0; + for (int64_t dim : dims) + { + if (dim >= ndim) + { + throw std::out_of_range("Permutation dimension out of range"); } - return true; - } - /// @brief Reduce the tensor to the maximum value of all elements - /// @return a tensor with a single element, the maximum of all elements in the tensor - inline Tensor<> max() const { - return reduce_impl(ReduceOp::MAX); + new_shapes[i] = this->shapes_[dim]; + new_strides[i] = this->strides_[dim]; + ++i; } - - /// @brief Reduce the tensor to the indices of the maximum values along each row - /// @return a tensor with indices of the maximum values for each row - inline Tensor argmax() const { - return reduce_impl(ReduceOp::ARGMAX); + Tensor result = *this; + result.shapes_ = new_shapes; + result.strides_ = new_strides; + + return result; + } + + /// @brief Flatten the tensor into 1D in-place. + /// @details This function only changes the shape of the tensor, and does not modify the underlying data. + /// @post The shape of the tensor is changed to 1D, with the same elements as the original tensor. + void flatten() + { + this->shapes_ = {this->size()}; + this->calculate_strides(); + return; + } + + /// @brief Calculate the absolute value of each element in the tensor + /// @return a new tensor with the same shape as the original, but with each element replaced by its absolute value + Tensor abs() const + { + Tensor result = *this; + + for (size_t i = 0; i < this->size(); i++) + { + (*result.data_)[i] = std::abs((*this->data_)[i]); } - /// @brief Reduce the tensor to the minimum value of all elements - /// @return a tensor with a single element, the minimum of all elements in the tensor - inline Tensor<> min() const { - return reduce_impl(ReduceOp::MIN); + return result; + } + + /// @brief Filter the tensor with the given function + /// @param func a function to test each element of the tensor. It should return true if the element passes the test + /// @return a new tensor with the same shape as the original, but all elements that fail the test are set to 0. + Tensor filter(bool (*func)(T)) const + { + Tensor result = *this; + + for (size_t i = 0; i < this->size(); i++) + { + if (func((*this->data_)[i])) + { + (*result.data_)[i] = (*this->data_)[i]; + } } - /// @brief Reduce the tensor to the indices of the minimum values along each row - /// @return a tensor with indices of the minimum values for each row - inline Tensor argmin() const { - return reduce_impl(ReduceOp::ARGMIN); - } + return result; + } - - /// @brief Convert the tensor to a tensor of a different type. - /// @details If U is not provided, it defaults to double. - /// @param U the type to convert to - /// @return a tensor with the same shape and data, but with the type U - template - Tensor dtype() const { - return dtype_impl(*this); + /// @brief Perform element-wise transformation with a function + /// @param func a function to perform element-wise transformation to the tensor + /// @return a new tensor with the same shape as the original, but with each element transformed by the given func + Tensor map(T (*func)(T)) const + { + Tensor result = *this; + + for (size_t i = 0; i < this->size(); i++) + { + (*result.data_)[i] = func((*this->data_)[i]); } - /// @brief Reshape the tensor to the specified new shape. - /// @details This function changes the shape of the tensor without altering the data. - /// The total number of elements must remain the same; otherwise, an exception is thrown. - /// @param new_shape The desired shape for the tensor. - /// @throws runtime_error if the new shape is not compatible with the current number of elements. - void reshape(const vector& new_shape) { - size_t new_size = 1; - for (const size_t& dim : new_shape) { - new_size *= dim; - } + return result; + } - if (new_size != this->size()) { - throw runtime_error("New shape must be compatible with the original shape"); - } + /// @brief Calculate the sum of all elements in the tensor + /// @return The sum of all elements in the tensor, regardless of the dimension + T sum() const + { + T sum = static_cast(0); - this->shapes_ = new_shape; - this->calculate_strides(); + for (size_t i = 0; i < this->size(); i++) + { + sum += (*this->data_)[i]; + } - return; + return sum; + } + + /// @brief Check if all elements of two tensors are equal + /// @param other Tensor to compare + /// @return Tensor of integers where each element is 1 if the two tensors are equal at the same index, 0 otherwise + Tensor equal(const Tensor &other) const + { + if (other.shapes_ != this->shapes_) + { + throw runtime_error("Shape mismatch"); } - /// @brief Return a deep copy of the tensor. Actually the same as the copy constructor. - /// @details This function will create a new tensor with the same shape and data as the current tensor. - /// @return a new tensor which is a deep copy of the current tensor - Tensor clone() const { - Tensor result = *this; + Tensor result(this->shapes_, static_cast(0)); + const vector &result_strides = result.strides_; - return result; + for (size_t i = 0; i < this->size(); i++) + { + auto [a_offset, b_offset] = calculate_tensors_offsets(i, this->ndim(), result_strides, other); + + (*result.data_)[i] = (*this->data_)[a_offset] == (*other.data_)[b_offset]; } - vector to_vector() const { return (*this->data_); } - - // Get the dimension of the tensor - inline size_t ndim() const { - return this->shapes_.size(); + return result.dtype(); + } + + /// @brief Check if all elements of two tensors are equal + /// @param other Tensor to compare + /// @return true if all elements are equal, false otherwise + bool compare(const Tensor &other) const + { + if (other.shapes_ != this->shapes_) + { + throw runtime_error("Shape mismatch"); } - const size_t size() const { - if (this->offset_ == 0) { - return this->data_->size(); - } + for (size_t i = 0; i < this->size(); i++) + { + auto [a_offset, b_offset] = calculate_tensors_offsets(i, this->ndim(), this->strides_, other); - if (this->size_ != -1) { - return this->size_; + if ((*this->data_)[a_offset] != (*other.data_)[b_offset]) + { + return false; } - - this->size_ = 1; - for (const size_t& s : this->shapes_) { - this->size_ *= s; - } - - return this->size_; } - - /// @brief Print the tensor to console. - /// @details This function will print the tensor in a nested array style. - void print() const { - print_recursive_impl(0, 0, 0); - cout << endl; // flush the output - return; + return true; + } + + /// @brief Reduce the tensor to the maximum value of all elements + /// @return a tensor with a single element, the maximum of all elements in the tensor + inline Tensor<> max() const + { + return reduce_impl(ReduceOp::MAX); + } + + /// @brief Reduce the tensor to the indices of the maximum values along each row + /// @return a tensor with indices of the maximum values for each row + inline Tensor argmax() const + { + return reduce_impl(ReduceOp::ARGMAX); + } + + /// @brief Reduce the tensor to the minimum value of all elements + /// @return a tensor with a single element, the minimum of all elements in the tensor + inline Tensor<> min() const + { + return reduce_impl(ReduceOp::MIN); + } + + /// @brief Reduce the tensor to the indices of the minimum values along each row + /// @return a tensor with indices of the minimum values for each row + inline Tensor argmin() const + { + return reduce_impl(ReduceOp::ARGMIN); + } + + /// @brief Convert the tensor to a tensor of a different type. + /// @details If U is not provided, it defaults to double. + /// @param U the type to convert to + /// @return a tensor with the same shape and data, but with the type U + template + Tensor dtype() const + { + return dtype_impl(*this); + } + + /// @brief Reshape the tensor to the specified new shape. + /// @details This function changes the shape of the tensor without altering the data. + /// The total number of elements must remain the same; otherwise, an exception is thrown. + /// @param new_shape The desired shape for the tensor. + /// @throws runtime_error if the new shape is not compatible with the current number of elements. + void reshape(const vector &new_shape) + { + size_t new_size = 1; + for (const size_t &dim : new_shape) + { + new_size *= dim; } - inline const vector& shapes() const { return this->shapes_; } - + if (new_size != this->size()) + { + throw runtime_error("New shape must be compatible with the original shape"); + } - // ========================================operators overloading======================================== - inline Tensor operator+(const Tensor& other) const { return this->add(other); } - inline Tensor operator-(const Tensor& other) const { return this->sub(other); } - inline Tensor operator*(const Tensor& other) const { return this->mul(other); } - inline Tensor operator*(const T& scaler) const { return this->mul(scaler); } - inline bool operator==(const Tensor& other) const { return this->compare(other); } + this->shapes_ = new_shape; + this->calculate_strides(); - Tensor& operator=(const Tensor& other) { - if (this == &other) return *this; + return; + } - this->shapes_ = other.shapes_; - this->data_ = make_shared>(*(other.data_)); - this->calculate_strides(); + /// @brief Return a deep copy of the tensor. Actually the same as the copy constructor. + /// @details This function will create a new tensor with the same shape and data as the current tensor. + /// @return a new tensor which is a deep copy of the current tensor + Tensor clone() const + { + Tensor result; - // Copy data from original tensor's view to the new contiguous storage - for (size_t i = 0; i < this->size(); ++i) { - vector indices = linear_to_multi_idxs(i, this->shapes_); - size_t src_offset = other.offset_; + result.shapes_ = this->shapes_; + result.data_ = make_shared>(*(this->data_)); + result.calculate_strides(); - for (size_t dim = 0; dim < indices.size(); ++dim) { - src_offset += indices[dim] * other.strides_[dim]; - } + // Copy data from original tensor's view to the new contiguous storage + for (size_t i = 0; i < this->size(); ++i) + { + vector indices = linear_to_multi_idxs(i, result.shapes_); + size_t src_offset = this->offset_; - (*this->data_)[i] = (*other.data_)[src_offset]; + for (size_t dim = 0; dim < indices.size(); ++dim) + { + src_offset += indices[dim] * this->strides_[dim]; } - return *this; + (*result.data_)[i] = (*this->data_)[src_offset]; } - const Tensor operator+=(const Tensor& other) { - *this = *this + other; - return *this; + return result; + } + + vector to_vector() const { return (*this->data_); } + + // Get the dimension of the tensor + inline size_t ndim() const + { + return this->shapes_.size(); + } + + const size_t size() const + { + if (this->offset_ == 0) + { + return this->data_->size(); } - const Tensor operator-=(const Tensor& other) { - *this = *this - other; - return *this; + if (this->size_ != -1) + { + return this->size_; } - const Tensor operator*=(const Tensor& other) { - *this = *this * other; - return *this; + this->size_ = 1; + for (const size_t &s : this->shapes_) + { + this->size_ *= s; } - const Tensor operator*=(const T& other) { - *this = *this * other; + return this->size_; + } + + /// @brief Print the tensor to console. + /// @details This function will print the tensor in a nested array style. + void print() const + { + print_recursive_impl(0, 0, 0); + cout << endl; // flush the output + return; + } + + inline const vector &shapes() const { return this->shapes_; } + + // ========================================operators overloading======================================== + inline Tensor operator+(const Tensor &other) const { return this->add(other); } + inline Tensor operator-(const Tensor &other) const { return this->sub(other); } + inline Tensor operator*(const Tensor &other) const { return this->mul(other); } + inline Tensor operator*(const T &scaler) const { return this->mul(scaler); } + inline bool operator==(const Tensor &other) const { return this->compare(other); } + + /* + Instead of returning a new tensor, we modify the current tensor in place. + + Besides, it is slightly different from method clone(), in which it will not modify data_ to make all the elements stored contiguously. + */ + Tensor &operator=(const Tensor &other) + { + if (this == &other) return *this; + + this->shapes_ = other.shapes_; + this->data_ = make_shared>(*(other.data_)); + this->strides_ = other.strides_; + this->offset_ = other.offset_; + this->size_ = other.size_; + + return *this; + } + + const Tensor operator+=(const Tensor &other) + { + *this = *this + other; + return *this; + } + + const Tensor operator-=(const Tensor &other) + { + *this = *this - other; + return *this; + } + + const Tensor operator*=(const Tensor &other) + { + *this = *this * other; + return *this; + } + + const Tensor operator*=(const T &other) + { + *this = *this * other; + return *this; + } + + // lvalue operator overloading + template + T &operator[](Indices... indices) + { + vector idxs = this->get_idxs(indices...); + return (*this->data_)[this->calculate_idx(idxs)]; + } + + // Using vector to index the tensor + T &operator[](const vector &indices) + { + return (*this->data_)[this->calculate_idx(indices)]; + } + + // rvalue operator overloading + template + const T &operator[](Indices... indices) const + { + vector idxs = this->get_idxs(indices...); + return (*this->data_)[this->calculate_idx(idxs)]; + } + + // Using vector to index the tensor + const T &operator[](const vector &indices) const + { + return (*this->data_)[this->calculate_idx(indices)]; + } + + /** + * @brief Advanced indexing using a combination of integers, strings, and slices. + * + * This function allows for flexible indexing into the tensor, similar to Python's + * advanced indexing. It supports integer indices, string-based slices, and the ellipsis + * ("...") for automatic dimension completion. The function expands slices and handles + * ellipsis to generate the appropriate sub-tensor. + * + * @param indices A vector of indices where each index can be an integer, a string + * representing a slice, or a special ellipsis ("..."). + * @return A new tensor that is indexed from the current tensor according to the given indices. + * + * @throw std::invalid_argument if an index type is invalid or if more than one ellipsis is used. + */ + using IndexType = variant; + Tensor index(const vector &indices) const + { + vector> expanded_indices; + + // Handle ellipsis and expand slices + // cout << "Start expanding indices" << endl; + for (size_t i = 0; i < indices.size(); ++i) + { + const auto &idx = indices[i]; + + if (auto str_idx = get_if(&idx)) + { + Slice slice = Slice::parse(*str_idx); + expanded_indices.push_back(apply_slice(slice, this->shapes_[i])); + } + else if (auto int_idx = get_if(&idx)) + { + expanded_indices.push_back({normalize_index(*int_idx, this->shapes_[i])}); + } + else if (auto slice_idx = get_if(&idx)) + { + expanded_indices.push_back(apply_slice(*slice_idx, this->shapes_[i])); + } + else + { + throw std::invalid_argument("Invalid index type"); + } } - // lvalue operator overloading - template - T& operator[](Indices... indices) { - vector idxs = this->get_idxs(indices...); - return (*this->data_)[this->calculate_idx(idxs)]; - } - - T& operator[](const vector& indices) { - return (*this->data_)[this->calculate_idx(indices)]; - } - - // rvalue operator overloading - template - const T& operator[](Indices... indices) const { - vector idxs = this->get_idxs(indices...); - return (*this->data_)[this->calculate_idx(idxs)]; - } - - const T& operator[](const vector& indices) const { - return (*this->data_)[this->calculate_idx(indices)]; - } - - /** - * @brief Advanced indexing using a combination of integers, strings, and slices. - * - * This function allows for flexible indexing into the tensor, similar to Python's - * advanced indexing. It supports integer indices, string-based slices, and the ellipsis - * ("...") for automatic dimension completion. The function expands slices and handles - * ellipsis to generate the appropriate sub-tensor. - * - * @param indices A vector of indices where each index can be an integer, a string - * representing a slice, or a special ellipsis ("..."). - * @return A new tensor that is indexed from the current tensor according to the given indices. - * - * @throw std::invalid_argument if an index type is invalid or if more than one ellipsis is used. - */ - using IndexType = variant; - Tensor index(const vector& indices) const { - vector> expanded_indices; - - // Handle ellipsis and expand slices - // cout << "Start expanding indices" << endl; - for (size_t i = 0; i < indices.size(); ++i) { - const auto& idx = indices[i]; - - if (auto str_idx = get_if(&idx)) { - Slice slice = Slice::parse(*str_idx); - expanded_indices.push_back(apply_slice(slice, this->shapes_[i])); - } - else if (auto int_idx = get_if(&idx)) { - expanded_indices.push_back({normalize_index(*int_idx, this->shapes_[i])}); - } - else if (auto slice_idx = get_if(&idx)) { - expanded_indices.push_back(apply_slice(*slice_idx, this->shapes_[i])); - } - else { - throw std::invalid_argument("Invalid index type"); + // Calculate new dimensions + vector new_dims; + for (const vector &expanded_idx : expanded_indices) + { + if (expanded_idx[0] != -1) + { // Not None/newaxis + if (expanded_idx.size() > 1) + { + new_dims.push_back(expanded_idx.size()); } } - - // Calculate new dimensions - vector new_dims; - for (const vector& expanded_idx : expanded_indices) { - if (expanded_idx[0] != -1) { // Not None/newaxis - if (expanded_idx.size() > 1) { - new_dims.push_back(expanded_idx.size()); - } - } - else { - new_dims.push_back(1); - } + else + { + new_dims.push_back(1); } + } - // cout << "Start printing new_dims" << endl; - // cout << "new_dims size: " << new_dims.size() << endl; - // for (size_t i = 0; i < new_dims.size(); ++i) { - // cout << new_dims[i] << " "; - // } - - // Create result tensor - Tensor result(new_dims, static_cast(0)); - - // Fill result tensor - vector current_indices(expanded_indices.size()); - vector result_indices; - - // Recursive lambda to fill result tensor - function fill_tensor = [&](size_t depth) { - if (depth == expanded_indices.size()) { - result_indices.clear(); - for (int i = 0; i < expanded_indices.size(); ++i) { - if (expanded_indices[i][0] != -1 && expanded_indices[i].size() > 1) { - result_indices.push_back(current_indices[i]); - } - } - - vector original_indices; - for (int i = 0; i < expanded_indices.size(); ++i) { - if (expanded_indices[i][0] != -1) { - original_indices.push_back(expanded_indices[i][current_indices[i]]); - } + // cout << "Start printing new_dims" << endl; + // cout << "new_dims size: " << new_dims.size() << endl; + // for (size_t i = 0; i < new_dims.size(); ++i) { + // cout << new_dims[i] << " "; + // } + + // Create result tensor + Tensor result(new_dims, static_cast(0)); + + // Fill result tensor + vector current_indices(expanded_indices.size()); + vector result_indices; + + // Recursive lambda to fill result tensor + function fill_tensor = [&](size_t depth) + { + if (depth == expanded_indices.size()) + { + result_indices.clear(); + for (int i = 0; i < expanded_indices.size(); ++i) + { + if (expanded_indices[i][0] != -1 && expanded_indices[i].size() > 1) + { + result_indices.push_back(current_indices[i]); } - - result[result_indices] = (*this)[original_indices]; - return; } - - for (int i = 0; i < expanded_indices[depth].size(); ++i) { - current_indices[depth] = i; - fill_tensor(depth + 1); + + vector original_indices; + for (int i = 0; i < expanded_indices.size(); ++i) + { + if (expanded_indices[i][0] != -1) + { + original_indices.push_back(expanded_indices[i][current_indices[i]]); + } } - }; - - fill_tensor(0); - return result; - } + + result[result_indices] = (*this)[original_indices]; + return; + } + + for (int i = 0; i < expanded_indices[depth].size(); ++i) + { + current_indices[depth] = i; + fill_tensor(depth + 1); + } + }; + + fill_tensor(0); + return result; + } }; \ No newline at end of file From b7e88e28aa6ebfbfa4f29b2df9743882ff4ae38c Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Sun, 9 Feb 2025 17:04:51 +0800 Subject: [PATCH 18/36] refractor: formatting --- include/utils/tensor_utils.hpp | 71 +++++++++++++++++----------- include/utils/utils.hpp | 4 +- src/models/mlp.cpp | 33 ++++++++----- src/modules/activations/softmax.cpp | 40 +++++++++++----- src/modules/losses/cross_entropy.cpp | 26 ++++++---- src/utils/utils.cpp | 18 +++---- 6 files changed, 124 insertions(+), 68 deletions(-) diff --git a/include/utils/tensor_utils.hpp b/include/utils/tensor_utils.hpp index 941231c..345dfd8 100644 --- a/include/utils/tensor_utils.hpp +++ b/include/utils/tensor_utils.hpp @@ -1,6 +1,7 @@ #pragma once #include #include +#include #include #include #include @@ -10,7 +11,8 @@ #include #include #include - +#include +#include using namespace std; @@ -20,11 +22,12 @@ template class Tensor; // Convert tensor to different data type -template -Tensor dtype_impl(const Tensor& tensor); +template +Tensor dtype_impl(const Tensor &tensor); // for max, min ,argmax, argmin reduction -enum class ReduceOp { +enum class ReduceOp +{ MAX, MIN, ARGMAX, @@ -32,7 +35,8 @@ enum class ReduceOp { }; // for add, subtract, multiply, divide -enum class ArithmeticOp { +enum class ArithmeticOp +{ ADD, SUB, MUL, @@ -40,52 +44,65 @@ enum class ArithmeticOp { }; // Slice struct to handle Python-like slicing -struct Slice { +struct Slice +{ int start; int stop; int step; - + Slice(int start_ = 0, int stop_ = -1, int step_ = 1) : start(start_), stop(stop_), step(step_) {} - - static Slice parse(const string& slice_str); + + static Slice parse(const string &slice_str); }; // Helper function to convert negative indices to positive size_t normalize_index(int idx, size_t dim_size); // Helper function to apply slice to a dimension -vector apply_slice(const Slice& slice, size_t dim_size); +vector apply_slice(const Slice &slice, size_t dim_size); -vector linear_to_multi_idxs(size_t idx, const vector& shape); +// Helper function to calculate the offset of the tensor given a single index +vector linear_to_multi_idxs(size_t idx, const vector &shape); // Type trait to check if a type is a std::vector -template -struct is_vector : public std::false_type {}; +template +struct is_vector : public std::false_type +{ +}; -template -struct is_vector> : public std::true_type {}; +template +struct is_vector> : public std::true_type +{ +}; // Type trait to check if a type is a std::vector -template -struct is_initializer_list : public std::false_type {}; +template +struct is_initializer_list : public std::false_type +{ +}; -template -struct is_initializer_list> : public std::true_type {}; +template +struct is_initializer_list> : public std::true_type +{ +}; // ================================================definition================================================ -template -Tensor dtype_impl(const Tensor& tensor) { +template +Tensor dtype_impl(const Tensor &tensor) +{ Tensor result; - result.shapes_ = tensor.shapes_; + result.shapes_ = tensor.shapes_; result.data_ = make_shared>(); result.data_->resize(tensor.data_->size()); - + result.strides_ = tensor.strides_; + result.offset_ = tensor.offset_; + result.size_ = tensor.size_; + std::transform(tensor.data_->begin(), tensor.data_->end(), result.data_->begin(), - [](const U& val) { return static_cast(val); }); - - result.calculate_strides(); - + [](const U &val) + { return static_cast(val); }); + return result; } \ No newline at end of file diff --git a/include/utils/utils.hpp b/include/utils/utils.hpp index 644fb92..27bd80d 100644 --- a/include/utils/utils.hpp +++ b/include/utils/utils.hpp @@ -2,5 +2,5 @@ #include using namespace std; -void print_training_stats(int batch, float loss, float accuracy); -void print_training_stats_line(int batch, float loss, float accuracy); \ No newline at end of file +void print_stats(int batch, float loss, float accuracy); +void print_stats_line(int batch, float loss, float accuracy); \ No newline at end of file diff --git a/src/models/mlp.cpp b/src/models/mlp.cpp index 8a5fda4..f1d0700 100644 --- a/src/models/mlp.cpp +++ b/src/models/mlp.cpp @@ -3,12 +3,15 @@ #include "relu.hpp" #include "dropout.hpp" -MLP::MLP(vector layer_sizes, double dropout_p) { +MLP::MLP(vector layer_sizes, double dropout_p) +{ this->num_layers_ = layer_sizes.size(); - for (size_t i = 0; i < this->num_layers_ - 1; i++) { + for (size_t i = 0; i < this->num_layers_ - 1; i++) + { this->layers_.push_back(new Linear(layer_sizes[i], layer_sizes[i + 1], true)); - if (i < this->num_layers_ - 2) { + if (i < this->num_layers_ - 2) + { this->layers_.push_back(new ReLU()); this->layers_.push_back(new Dropout(dropout_p)); } @@ -17,34 +20,42 @@ MLP::MLP(vector layer_sizes, double dropout_p) { MLP::MLP(initializer_list layer_sizes, double dropout_p) : MLP(vector(layer_sizes), dropout_p) {} -MLP::~MLP() { - for (Module* layer : this->layers_) { +MLP::~MLP() +{ + for (Module *layer : this->layers_) + { delete layer; } } -Tensor<> MLP::forward(const Tensor<>& input) { +Tensor<> MLP::forward(const Tensor<> &input) +{ Tensor<> x = input; - for (Module* layer : this->layers_) { + for (Module *layer : this->layers_) + { x = layer->forward(x); } return x; } -Tensor<> MLP::backward(const Tensor<>& grad_output) { +Tensor<> MLP::backward(const Tensor<> &grad_output) +{ Tensor<> grad = grad_output; - for (int i = this->layers_.size() - 1; i >= 0; i--) { + for (int i = this->layers_.size() - 1; i >= 0; i--) + { grad = this->layers_[i]->backward(grad); } return grad; } -void MLP::update_params(const float lr) { - for (Module* layer : this->layers_) { +void MLP::update_params(const float lr) +{ + for (Module *layer : this->layers_) + { layer->update_params(lr); } diff --git a/src/modules/activations/softmax.cpp b/src/modules/activations/softmax.cpp index a7afdca..6fe51c2 100644 --- a/src/modules/activations/softmax.cpp +++ b/src/modules/activations/softmax.cpp @@ -2,26 +2,32 @@ #include "softmax.hpp" using namespace nn; -Softmax::Softmax() { +Softmax::Softmax() +{ cout << "Starting Softmax" << endl; cout << "Softmax initialized" << endl; } -Tensor<> Softmax::softmax_helper(const Tensor<>& input) { - Tensor<> result = input.map([](double x) { return exp(x); }); +Tensor<> Softmax::softmax_helper(const Tensor<> &input) +{ + Tensor<> result = input.map([](double x) + { return exp(x); }); double sum = result.sum(); return result * (1 / sum); } -vector Softmax::softmax_helper(const vector& input) { +vector Softmax::softmax_helper(const vector &input) +{ double sum = 0.0f; vector result; - for (size_t i = 0; i < input.size(); i++) { + for (size_t i = 0; i < input.size(); i++) + { sum += exp(input[i]); } - for (size_t i = 0; i < input.size(); i++) { + for (size_t i = 0; i < input.size(); i++) + { result.push_back(exp(input[i]) / sum); } @@ -29,21 +35,32 @@ vector Softmax::softmax_helper(const vector& input) { } // Only support 1D and 2D Tensors -Tensor<> Softmax::forward(const Tensor<>& input) { +Tensor<> Softmax::forward(const Tensor<> &input) +{ // In softmax case, we don't have to store the input as it is not used in the backward pass // Instead, we store the softmax(input) - if (input.ndim() == 1) { + if (input.ndim() == 1) + { return this->softmax_helper(input); } + // const size_t leading_ndim = input.ndim() - 2; + + // vector leading_shape(input.shapes().begin(), input.shapes().end() - 2); + + // const size_t n = input.shapes()[leading_ndim]; + // const size_t m = input.shapes()[leading_ndim + 1]; + vector> softmax_input; - for (size_t i = 0; i < input.shapes()[0]; i++) { + for (size_t i = 0; i < input.shapes()[0]; i++) + { vector input_row; input_row.reserve(input.shapes()[1]); - for (size_t j = 0; j < input.shapes()[1]; j++) { + for (size_t j = 0; j < input.shapes()[1]; j++) + { input_row.push_back(input[i, j]); } softmax_input.push_back(this->softmax_helper(input_row)); @@ -54,7 +71,8 @@ Tensor<> Softmax::forward(const Tensor<>& input) { return this->softmax_input_cache_; } -Tensor<> Softmax::backward(const Tensor<>& grad_output) { +Tensor<> Softmax::backward(const Tensor<> &grad_output) +{ Tensor<> softmax_grad; return softmax_grad; diff --git a/src/modules/losses/cross_entropy.cpp b/src/modules/losses/cross_entropy.cpp index a75f61a..b20ed72 100644 --- a/src/modules/losses/cross_entropy.cpp +++ b/src/modules/losses/cross_entropy.cpp @@ -3,14 +3,16 @@ #include using namespace nn; -CrossEntropyLoss::CrossEntropyLoss() { +CrossEntropyLoss::CrossEntropyLoss() +{ cout << "Starting CrossEntropyLoss" << endl; cout << "CrossEntropyLoss initialized" << endl; } -double CrossEntropyLoss::forward(const Tensor<>& Y_hat, const Tensor<>& Y) { +double CrossEntropyLoss::forward(const Tensor<> &Y_hat, const Tensor<> &Y) +{ /* - L = 1 / B \sum_{i=1}^B \sum_{j=1}^M Y_{ij} * log(softmax(Y_hat_{ij)}) + L = 1 / B \sum_{i=1}^B \sum_{j=1}^M Y_{ij} * log(softmax(Y_hat_{ij})) R^B x M, Y R^B x M @@ -19,14 +21,17 @@ double CrossEntropyLoss::forward(const Tensor<>& Y_hat, const Tensor<>& Y) { // We don't have to store the Y_hat as it is not used in the backward pass. Instead, we store the softmax(Y_hat) // Note that this->Y_cache_ is just a vector with label, and it is not a matrix with one-hot vectors. - if (Y.ndim() == 2) { + if (Y.ndim() == 2) + { // In this case, we assume Y is a matrix of one-hot vectors. So we can just store the index of the correct label this->Y_cache_ = Y.argmax().dtype(); } - else if (Y.ndim() == 1) { + else if (Y.ndim() == 1) + { this->Y_cache_ = Y; } - else { + else + { throw std::runtime_error("Currently, Cross Entropy Loss does not support label with more than 2 dimensions."); } @@ -41,7 +46,8 @@ double CrossEntropyLoss::forward(const Tensor<>& Y_hat, const Tensor<>& Y) { // sum up all the elements double loss_without_factor = 0.0f; - for (int i = 0; i < B; ++i) { + for (int i = 0; i < B; ++i) + { // Y_{ij} * log(softmax(Y_hat_{ij})) loss_without_factor += log(softmax_Y_hat[i, static_cast(this->Y_cache_[i])]); } @@ -49,7 +55,8 @@ double CrossEntropyLoss::forward(const Tensor<>& Y_hat, const Tensor<>& Y) { return loss_without_factor * factor; } -Tensor<> CrossEntropyLoss::backward() { +Tensor<> CrossEntropyLoss::backward() +{ /* dL/dY_hat should have the same shape as Y_hat @@ -69,7 +76,8 @@ Tensor<> CrossEntropyLoss::backward() { Since Y is a matrix of one-hot vectors, only the correct label is 1 and the rest are 0 */ - for (int i = 0; i < B; ++i) { + for (int i = 0; i < B; ++i) + { grad_output[i, static_cast(this->Y_cache_[i])] -= 1.0f; } diff --git a/src/utils/utils.cpp b/src/utils/utils.cpp index 6a90dfe..e4de46b 100644 --- a/src/utils/utils.cpp +++ b/src/utils/utils.cpp @@ -1,15 +1,17 @@ #include "utils.hpp" -void print_training_stats(int batch, float loss, float accuracy) { +void print_stats(int batch, float loss, float accuracy) +{ cout << "\rBatch " << setw(4) << batch << " " - << "Loss: " << fixed << setprecision(5) << setw(8) << loss << " " - << "Accuracy: " << fixed << setprecision(2) << setw(6) << accuracy * 100 << "%" - << flush; + << "Loss: " << fixed << setprecision(5) << setw(8) << loss << " " + << "Accuracy: " << fixed << setprecision(2) << setw(6) << accuracy * 100 << "%" + << flush; } -void print_training_stats_line(int batch, float loss, float accuracy) { +void print_stats_line(int batch, float loss, float accuracy) +{ cout << "Batch " << setw(4) << batch << " " - << "Loss: " << fixed << setprecision(5) << setw(8) << loss << " " - << "Accuracy: " << fixed << setprecision(2) << setw(6) << accuracy * 100 << "%" - << endl; + << "Loss: " << fixed << setprecision(5) << setw(8) << loss << " " + << "Accuracy: " << fixed << setprecision(2) << setw(6) << accuracy * 100 << "%" + << endl; } \ No newline at end of file From 244cbcc9c1e91cc9c6263d16aa38f4a37aae42a8 Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Sun, 9 Feb 2025 17:05:05 +0800 Subject: [PATCH 19/36] fix: remove grad initialization in the constructor --- src/modules/layers/linear.cpp | 45 +++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/src/modules/layers/linear.cpp b/src/modules/layers/linear.cpp index e033725..78838b7 100644 --- a/src/modules/layers/linear.cpp +++ b/src/modules/layers/linear.cpp @@ -3,40 +3,40 @@ #include "linear.hpp" using namespace nn; -Linear::Linear(size_t in_features, size_t out_features, bool bias) : in_features_(in_features), out_features_(out_features), use_bias_(bias) { +Linear::Linear(size_t in_features, size_t out_features, bool bias) : in_features_(in_features), out_features_(out_features), use_bias_(bias) +{ this->weight_ = Tensor<>({in_features, out_features}, 0.0f); - - if (this->use_bias_) { + + if (this->use_bias_) + { this->bias_ = Tensor<>({out_features, 1}, 0.0f); } - else { - this->bias_ = Tensor<>(); - } // randomize the weights. The bias is originally 0. this->randomizeParams(); - this->grad_weight_ = Tensor<>({in_features, out_features}, 0.0f);; - this->grad_bias_ = Tensor<>({out_features, 1}, 0.0f); - cout << "Linear layer initialized with in_features = " << in_features << " and out_features = " << out_features << endl; cout << &this->input_cache_ << endl; } -Tensor<> Linear::forward(const Tensor<>& input) { +Tensor<> Linear::forward(const Tensor<> &input) +{ this->input_cache_ = input; size_t batchSize = input.shapes()[0]; - const Tensor<>& XW = input.matmul(this->weight_); + const Tensor<> &XW = input.matmul(this->weight_); - if (!this->use_bias_) { + if (!this->use_bias_) + { return XW; } Tensor<> biases_repeated = Tensor<>({batchSize, this->out_features_}, 0.0f); - for (size_t i = 0; i < batchSize; i++) { - for (size_t j = 0; j < this->out_features_; j++) { + for (size_t i = 0; i < batchSize; i++) + { + for (size_t j = 0; j < this->out_features_; j++) + { biases_repeated[i, j] = this->bias_[j, 0]; } } @@ -44,7 +44,8 @@ Tensor<> Linear::forward(const Tensor<>& input) { return XW + biases_repeated; } -Tensor<> Linear::backward(const Tensor<>& grad_output) { +Tensor<> Linear::backward(const Tensor<> &grad_output) +{ // dL/dY = grad_output // dL/dW = X^T * dL/dY @@ -61,7 +62,7 @@ Tensor<> Linear::backward(const Tensor<>& grad_output) { dL/db = dL/dY^T * 1_B (1_B is a vector of ones of size batchSize) dL/db = dL/dY.sum(axis=0) */ - if (this->use_bias_) + if (this->use_bias_) this->grad_bias_ = grad_output.transpose().matmul(Tensor<>({grad_output.shapes()[0], 1}, 1.0f)); // cout << endl << "dL/db: " << endl; @@ -71,7 +72,8 @@ Tensor<> Linear::backward(const Tensor<>& grad_output) { return grad_input; } -void Linear::update_params(const float lr) { +void Linear::update_params(const float lr) +{ this->weight_ -= this->grad_weight_ * lr; this->bias_ -= this->grad_bias_ * lr; @@ -79,7 +81,8 @@ void Linear::update_params(const float lr) { return; } -void Linear::randomizeParams() { +void Linear::randomizeParams() +{ // Calculate the limit for the uniform distribution double limit = sqrt(6.0f / (this->in_features_ + this->out_features_)); @@ -91,8 +94,10 @@ void Linear::randomizeParams() { cout << "Starting randomization" << endl; // Xavier initialization - for (size_t i = 0; i < this->in_features_; i++) { - for (size_t j = 0; j < this->out_features_; j++) { + for (size_t i = 0; i < this->in_features_; i++) + { + for (size_t j = 0; j < this->out_features_; j++) + { this->weight_[i, j] = dis(gen); } } From b194ac4549456c1f5016c41152282b7e5231ef67 Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Mon, 10 Feb 2025 16:40:14 +0800 Subject: [PATCH 20/36] feat: finish the conv2d implementation --- include/modules/layers/conv2d.hpp | 16 +- include/utils/conv2d_utils.hpp | 32 +++- src/modules/layers/conv2d.cpp | 142 +++++++++-------- src/utils/conv2d_utils.cpp | 247 ++++++++++++++++++++++++++++++ 4 files changed, 361 insertions(+), 76 deletions(-) create mode 100644 src/utils/conv2d_utils.cpp diff --git a/include/modules/layers/conv2d.hpp b/include/modules/layers/conv2d.hpp index af2965e..c301ee2 100644 --- a/include/modules/layers/conv2d.hpp +++ b/include/modules/layers/conv2d.hpp @@ -1,10 +1,9 @@ +#pragma once #include #include "module.hpp" +#include "conv2d_utils.hpp" using namespace nn; -using int2 = std::pair; -using var_pair = std::variant; - namespace nn { @@ -17,16 +16,13 @@ namespace nn var_pair stride = 1, var_pair padding = 0, var_pair dilation = 1, + const string &padding_mode = "zeros", bool bias = true); virtual Tensor<> forward(const Tensor<> &input) override; virtual Tensor<> backward(const Tensor<> &grad_output) override; virtual void update_params(const float lr) override; - Tensor<> convolution(const int2 &stride, const int2 &dilation, const vector &output_shape, const Tensor<> &input, const Tensor<> &kernel, const Tensor<> &bias, bool use_bias); - - std::tuple calculate_output_shape(const Tensor<> &input); - private: int64_t in_channels_; int64_t out_channels_; @@ -35,10 +31,12 @@ namespace nn int2 padding_; int2 dilation_; bool use_bias_; + PaddingMode padding_mode_; + Padding padding_module_; vector original_input_shape_; Tensor<> weight_; Tensor<> bias_; - Tensor<> grad_weights_; - Tensor<> grad_biases_; + Tensor<> grad_weight_; + Tensor<> grad_bias_; }; } \ No newline at end of file diff --git a/include/utils/conv2d_utils.hpp b/include/utils/conv2d_utils.hpp index 40e3f05..dd70ddd 100644 --- a/include/utils/conv2d_utils.hpp +++ b/include/utils/conv2d_utils.hpp @@ -1,4 +1,34 @@ #include "tensor.hpp" using namespace std; -Tensor<> rotate_kernel(const Tensor<>& kernel); +using int2 = std::pair; +using var_pair = std::variant; + +enum class PaddingMode +{ + ZEROS, + REFLECT, + REPLICATE +}; + +class Padding +{ +public: + Padding() = default; + Padding(int2 padding, PaddingMode padding_mode) : padding_(padding), padding_mode_(padding_mode) {} + Tensor<> pad(const Tensor<> &input, const int2 &padding) const; + Tensor<> zero_pad(const Tensor<> &input, const int2 &padding) const; + +private: + int2 padding_; + PaddingMode padding_mode_; +}; + +Tensor<> +convolution(const int2 &stride, const int2 &dilation, const vector &output_shape, const Tensor<> &input, const Tensor<> &kernel, const Tensor<> &bias, bool use_bias); + +const vector calculate_output_shape(const vector &input_shape, const int64_t out_channel, const int2 &kernel_size, const int2 &stride, const int2 &padding, const int2 &dilation); + +Tensor<> flip_vertical_and_horizontal(const Tensor<> &input); + +Tensor<> dilate_input(const Tensor<> &input, const int2 &dilation); \ No newline at end of file diff --git a/src/modules/layers/conv2d.cpp b/src/modules/layers/conv2d.cpp index 6cc7fdf..30d06c2 100644 --- a/src/modules/layers/conv2d.cpp +++ b/src/modules/layers/conv2d.cpp @@ -7,6 +7,7 @@ Conv2d::Conv2d(int64_t in_channels, var_pair stride, var_pair padding, var_pair dilation, + const string &padding_mode, bool bias) { this->in_channels_ = in_channels; @@ -41,6 +42,16 @@ Conv2d::Conv2d(int64_t in_channels, this->padding_ = std::visit(process_variant, padding); this->dilation_ = std::visit(process_variant, dilation); + unordered_map all_padding_modes = {{"zeros", PaddingMode::ZEROS}, {"reflect", PaddingMode::REFLECT}, {"replicate", PaddingMode::REPLICATE}}; + + if (all_padding_modes.find(padding_mode) == all_padding_modes.end()) + { + throw std::invalid_argument("Padding mode must be one of 'zeros', 'reflect', or 'replicate'"); + } + + this->padding_mode_ = all_padding_modes[padding_mode]; + this->padding_module_ = Padding(this->padding_, this->padding_mode_); + vector weight_shape = {(size_t)this->out_channels_, (size_t)this->in_channels_, (size_t)this->kernel_size_.first, (size_t)this->kernel_size_.second}; this->weight_ = Tensor<>(weight_shape, 0.0); @@ -54,94 +65,93 @@ Conv2d::Conv2d(int64_t in_channels, Tensor<> Conv2d::forward(const Tensor<> &input) { + Tensor<> input_data = input; this->original_input_shape_ = input.shapes(); -} -Tensor<> Conv2d::backward(const Tensor<> &grad_output) -{ -} - -void Conv2d::update_params(const float lr) -{ -} + vector output_shape = calculate_output_shape(input.shapes(), this->out_channels_, this->kernel_size_, this->stride_, this->padding_, this->dilation_); -Tensor<> Conv2d::convolution(const int2 &stride, const int2 &dilation, const vector &output_shape, const Tensor<> &input, const Tensor<> &kernel, const Tensor<> &bias, bool use_bias) -{ - const vector &input_shape = input.shapes(); - const vector &kernel_shape = kernel.shapes(); - - const size_t B = output_shape[0]; - const size_t C_out = output_shape[1]; - const size_t H_out = output_shape[2]; - const size_t W_out = output_shape[3]; - - const size_t C_in = input_shape[1]; - const size_t H_in = input_shape[2]; - const size_t W_in = input_shape[3]; - - const size_t K_H = kernel_shape[2]; - const size_t K_W = kernel_shape[3]; - - Tensor<> output(output_shape, 0.0); - - /* - The logic behind is that - Let's us first focus on the first kernel among all out_channel kernels + if (this->padding_.first > 0 && this->padding_.second > 0) + { + input_data = this->padding_module_.pad(input_data, this->padding_); + } - Each input channel of the data is convolved with the same channel of the kernel, and the result is added to the output - Meaning that each input data channel only corresponds to the same channel of the kernel + // this input is the padded version of the original input + this->input_cache_ = input_data; - For example, the channel 1 of the input data is convolved with the channel 1 of the kernel, but it will not be convolved with the channel 2 of the kernel + return convolution(this->stride_, this->dilation_, output_shape, input_data, this->weight_, this->bias_, this->use_bias_); +} - After each input data channel convolving with the same channel of the kernel, element-wise addition is performed among all the convolved result with the first kernel +Tensor<> Conv2d::backward(const Tensor<> &grad_output) +{ + // dL_dY = grad_output - Now we get a single output channel + // dL_dW = conv(input_data, dL_dY) + Tensor<> permuted_input = this->input_cache_.permute({1, 0, 2, 3}); + Tensor<> permuted_grad_output = grad_output.permute({1, 0, 2, 3}); - We repeat this process for all the out_channel channels + this->grad_weight_ = convolution(this->dilation_, this->stride_, this->weight_.shapes(), permuted_input, permuted_grad_output, Tensor<>(), false); - And finally we will get an output with out_channel channels - */ + this->grad_weight_ = this->grad_weight_.permute({1, 0, 2, 3}); - for (size_t b = 0; b < B; ++b) + // dL_dB = sum(dL_dY, dims=(0, 2, 3)) + if (this->use_bias_) { - for (size_t c = 0; c < C_out; ++c) + this->grad_bias_ = Tensor<>({(size_t)this->out_channels_}, 0.0); + for (size_t i = 0; i < grad_output.shapes()[0]; i++) { - for (size_t h = 0; h < H_out; ++h) + for (size_t j = 0; j < grad_output.shapes()[1]; j++) { - for (size_t w = 0; w < W_out; ++w) + for (size_t k = 0; k < grad_output.shapes()[2]; k++) { - size_t h_start = h * stride.first; - size_t w_start = w * stride.second; - - for (size_t ic = 0; ic < C_in; ++ic) - { - for (size_t kh = 0; kh < K_H; ++kh) - { - for (size_t kw = 0; kw < K_W; ++kw) - { - size_t h_in = h_start + kh * dilation.first; - size_t w_in = w_start + kw * dilation.second; - - if (h_in >= 0 && h_in < H_in && w_in >= 0 && w_in < W_in) - { - output[b, c, h, w] += input[b, ic, h_in, w_in] * kernel[c, ic, kh, kw]; - } - } - } - } - - if (use_bias) + for (size_t l = 0; l < grad_output.shapes()[3]; l++) { - output[b, c, h, w] += bias[c]; + this->grad_bias_[j] += grad_output[i, j, k, l]; } } } } } - return output; + // dL_dX = fullconv(dL_dY, W) + Tensor<> flipped_weight = flip_vertical_and_horizontal(this->weight_); + Tensor<> permuted_flipped_weight = flipped_weight.permute({1, 0, 2, 3}); + + Tensor<> copy_grad_output = grad_output; + + if (this->stride_.first > 1 || this->stride_.second > 1) + { + copy_grad_output = dilate_input(copy_grad_output, this->stride_); + } + + const size_t H_further_pad = (this->kernel_size_.first - 1) * this->dilation_.first - this->padding_.first; + const size_t W_further_pad = (this->kernel_size_.second - 1) * this->dilation_.second - this->padding_.second; + + if (H_further_pad > 0 && W_further_pad > 0) + { + copy_grad_output = this->padding_module_.pad(copy_grad_output, {H_further_pad, W_further_pad}); + } + else if (H_further_pad < 0 && W_further_pad < 0) + { + permuted_flipped_weight = this->padding_module_.pad(permuted_flipped_weight, {-H_further_pad, -W_further_pad}); + } + else + { + throw std::invalid_argument("The further padding for dL/dX is not correct"); + } + + Tensor<> grad_input = convolution({1, 1}, this->dilation_, this->original_input_shape_, copy_grad_output, permuted_flipped_weight, Tensor<>(), false); + + return grad_input; } -std::tuple Conv2d::calculate_output_shape(const Tensor<> &input) +void Conv2d::update_params(const float lr) { + this->weight_ -= this->grad_weight_ * lr; + + if (this->use_bias_) + { + this->bias_ -= this->grad_bias_ * lr; + } + + return; } diff --git a/src/utils/conv2d_utils.cpp b/src/utils/conv2d_utils.cpp new file mode 100644 index 0000000..4f161a0 --- /dev/null +++ b/src/utils/conv2d_utils.cpp @@ -0,0 +1,247 @@ +#include "conv2d_utils.hpp" + +Tensor<> Padding::pad(const Tensor<> &input, const int2 &padding) const +{ + switch (this->padding_mode_) + { + case PaddingMode::ZEROS: + return this->zero_pad(input, padding); + break; + default: + throw std::invalid_argument("Invalid padding mode"); + } +} + +Tensor<> Padding::zero_pad(const Tensor<> &input, const int2 &padding) const +{ + const vector &input_shape = input.shapes(); + + if (input_shape.size() != 4) + { + throw std::invalid_argument("Input shape must be 4D"); + } + + const size_t B = input_shape[0]; + const size_t C = input_shape[1]; + const size_t H = input_shape[2]; + const size_t W = input_shape[3]; + + const size_t padded_H = H + padding.first * 2; + const size_t padded_W = W + padding.second * 2; + + Tensor<> padded_output({B, C, padded_H, padded_W}, 0.0); + + for (size_t b = 0; b < B; ++b) + { + for (size_t c = 0; c < C; ++c) + { + for (size_t h = 0; h < H; ++h) + { + for (size_t w = 0; w < W; ++w) + { + padded_output[b, c, h + padding.first, w + padding.second] = input[b, c, h, w]; + } + } + } + } + + return padded_output; +} + +Tensor<> convolution(const int2 &stride, const int2 &dilation, const vector &output_shape, const Tensor<> &input, const Tensor<> &kernel, const Tensor<> &bias, bool use_bias) +{ + const vector &input_shape = input.shapes(); + const vector &kernel_shape = kernel.shapes(); + + if (output_shape.size() != 4) + { + throw std::invalid_argument("Output shape must be 4D"); + } + if (input_shape.size() != 4) + { + throw std::invalid_argument("Input shape must be 4D"); + } + if (kernel_shape.size() != 4) + { + throw std::invalid_argument("Kernel shape must be 4D"); + } + + const size_t B = output_shape[0]; + const size_t C_out = output_shape[1]; + const size_t H_out = output_shape[2]; + const size_t W_out = output_shape[3]; + + const size_t C_in = input_shape[1]; + const size_t H_in = input_shape[2]; + const size_t W_in = input_shape[3]; + + const size_t K_H = kernel_shape[2]; + const size_t K_W = kernel_shape[3]; + + Tensor<> output(output_shape, 0.0); + + /* + The logic behind is that + Let's us first focus on the first kernel among all out_channel kernels + + Each input channel of the data is convolved with the same channel of the kernel, and the result is added to the output + Meaning that each input data channel only corresponds to the same channel of the kernel + + For example, the channel 1 of the input data is convolved with the channel 1 of the kernel, but it will not be convolved with the channel 2 of the kernel + + After each input data channel convolving with the same channel of the kernel, element-wise addition is performed among all the convolved result with the first kernel + + Now we get a single output channel + + We repeat this process for all the out_channel channels + + And finally we will get an output with out_channel channels + */ + + for (size_t b = 0; b < B; ++b) + { + for (size_t c = 0; c < C_out; ++c) + { + for (size_t h = 0; h < H_out; ++h) + { + for (size_t w = 0; w < W_out; ++w) + { + size_t h_start = h * stride.first; + size_t w_start = w * stride.second; + + for (size_t ic = 0; ic < C_in; ++ic) + { + for (size_t kh = 0; kh < K_H; ++kh) + { + for (size_t kw = 0; kw < K_W; ++kw) + { + size_t h_in = h_start + kh * dilation.first; + size_t w_in = w_start + kw * dilation.second; + + if (h_in >= 0 && h_in < H_in && w_in >= 0 && w_in < W_in) + { + output[b, c, h, w] += input[b, ic, h_in, w_in] * kernel[c, ic, kh, kw]; + } + } + } + } + + if (use_bias) + { + output[b, c, h, w] += bias[c]; + } + } + } + } + } + + return output; +} + +/** + * Calculate the output shape of a 2D convolutional layer. + * + * @param input_shape The shape of the input tensor, which is a 4D tensor with shape (B, C_in, H_in, W_in). + * @param out_channel The number of output channels. + * @param kernel_size The size of the kernel, which is a 2D integer pair. + * @param stride The stride of the convolution, which is a 2D integer pair. + * @param padding The padding of the convolution, which is a 2D integer pair. + * @param dilation The dilation of the convolution, which is a 2D integer pair. + * @return The output shape, which is a 4D vector with shape (B, out_channel, H_out, W_out). + * + * @throws std::invalid_argument if input_shape is not 4D or if the output shape is invalid. + */ +const vector calculate_output_shape(const vector &input_shape, const int64_t out_channel, const int2 &kernel_size, const int2 &stride, const int2 &padding, const int2 &dilation) +{ + if (input_shape.size() != 4) + { + throw std::invalid_argument("Input shape must be 4D"); + } + + const size_t B = input_shape[0]; + const size_t H_in = input_shape[2]; + const size_t W_in = input_shape[3]; + + const int64_t H_out = (H_in + 2 * padding.first - dilation.first * (kernel_size.first - 1) - 1) / stride.first + 1; + const int64_t W_out = (W_in + 2 * padding.second - dilation.second * (kernel_size.second - 1) - 1) / stride.second + 1; + + if (H_out <= 0 || W_out <= 0) + { + throw std::invalid_argument("Invalid output shape"); + } + + return {B, (size_t)out_channel, (size_t)H_out, (size_t)W_out}; +} + +Tensor<> flip_vertical_and_horizontal(const Tensor<> &input) +{ + if (input.ndim() != 4) + { + throw std::invalid_argument("Input shape must be 4D"); + } + + Tensor<> output(input.shapes(), 0.0); + + const size_t B = input.shapes()[0]; + const size_t C = input.shapes()[1]; + const size_t H = input.shapes()[2]; + const size_t W = input.shapes()[3]; + + for (size_t b = 0; b < B; ++b) + { + for (size_t c = 0; c < C; ++c) + { + for (size_t h = 0; h < H / 2; ++h) + { + for (size_t w = 0; w < W; ++w) + { + output[b, c, h, w] = input[b, c, H - h - 1, w]; + output[b, c, H - h - 1, w] = input[b, c, h, w]; + } + } + for (size_t h = 0; h < H; ++h) + { + for (size_t w = 0; w < W / 2; ++w) + { + output[b, c, h, w] = input[b, c, h, W - w - 1]; + output[b, c, h, W - w - 1] = input[b, c, h, w]; + } + } + } + } + + return output; +} +Tensor<> dilate_input(const Tensor<> &input, const int2 &dilation) +{ + if (input.ndim() != 4) + { + throw std::invalid_argument("Input shape must be 4D"); + } + + const size_t B = input.shapes()[0]; + const size_t C = input.shapes()[1]; + const size_t H = input.shapes()[2]; + const size_t W = input.shapes()[3]; + + const size_t H_dilated = H + (H - 1) * (dilation.first - 1); + const size_t W_dilated = W + (W - 1) * (dilation.second - 1); + + Tensor<> dilated_input({B, C, H_dilated, W_dilated}, 0.0); + + for (size_t b = 0; b < B; ++b) + { + for (size_t c = 0; c < C; ++c) + { + for (size_t h = 0; h < H; ++h) + { + for (size_t w = 0; w < W; ++w) + { + dilated_input[b, c, h * dilation.first, w * dilation.second] = input[b, c, h, w]; + } + } + } + } + + return dilated_input; +} \ No newline at end of file From 267b48182d162431b3cae33950e2a1df368c2876 Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Mon, 10 Feb 2025 16:40:35 +0800 Subject: [PATCH 21/36] refractor: remove extra stride --- include/core/tensor.hpp | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/include/core/tensor.hpp b/include/core/tensor.hpp index 02f21f7..afd29a4 100644 --- a/include/core/tensor.hpp +++ b/include/core/tensor.hpp @@ -209,7 +209,6 @@ class Tensor void calculate_strides() { this->strides_.resize(this->ndim(), 0); - vector strides(this->ndim()); int64_t stride = 1; @@ -245,13 +244,6 @@ class Tensor return {a_offset, b_offset}; } - // Declare friendship so that TensorView can access private members of Tensor - template - friend Tensor dtype_impl(const Tensor &tensor); - -public: - Tensor() = default; - // Helper to recursively flatten nested vectors and compute shapes template void flatten_vector(const std::vector &vec, size_t depth = 0) @@ -304,6 +296,13 @@ class Tensor } } + // Declare friendship so that TensorView can access private members of Tensor + template + friend Tensor dtype_impl(const Tensor &tensor); + +public: + Tensor() = default; + // Constructor for nested vectors template Tensor(const std::vector &input) @@ -648,7 +647,7 @@ class Tensor return result; } - Tensor permute(const initializer_list &dims) + Tensor permute(const initializer_list &dims) const { size_t ndim = this->ndim(); @@ -671,11 +670,11 @@ class Tensor seen_dims.insert(dim); } - vector new_shapes(ndim); - vector new_strides(ndim); + vector new_shapes(ndim); + vector new_strides(ndim); - int64_t i = 0; - for (int64_t dim : dims) + size_t i = 0; + for (size_t dim : dims) { if (dim >= ndim) { From 190270feb987e3aeb53fdc6081473479e7bbab96 Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Mon, 10 Feb 2025 16:40:55 +0800 Subject: [PATCH 22/36] fix: add pragma once to prevent re-load --- include/utils/utils.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/utils/utils.hpp b/include/utils/utils.hpp index 27bd80d..efdd871 100644 --- a/include/utils/utils.hpp +++ b/include/utils/utils.hpp @@ -1,3 +1,4 @@ +#pragma once #include #include using namespace std; From f5f07494e5a27e1c174233ef892869f1914c0732 Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Mon, 10 Feb 2025 16:41:08 +0800 Subject: [PATCH 23/36] fix: add conv2d_utils dependency --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 55db136..f483866 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,6 +36,7 @@ set(SOURCE_FILES src/core/module.cpp src/modules/layers/linear.cpp src/modules/layers/conv2d.cpp + src/utils/conv2d_utils.cpp src/modules/layers/dropout.cpp src/modules/layers/conv2d.cpp src/modules/losses/mse.cpp From 21da4ca763b1306adac7963ceeb8f51d5fcdf7e9 Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Wed, 12 Feb 2025 21:53:43 +0800 Subject: [PATCH 24/36] refractor: add conv2d playground --- examples/test_conv2d.cpp | 139 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 137 insertions(+), 2 deletions(-) diff --git a/examples/test_conv2d.cpp b/examples/test_conv2d.cpp index bb8de4a..3287a4e 100644 --- a/examples/test_conv2d.cpp +++ b/examples/test_conv2d.cpp @@ -3,7 +3,142 @@ using namespace nn; int main() { - Conv2d conv2d(1, 1, 3); - cout << "Conv2d layer initialized with in_channels = 1 and out_channels = 1" << endl; + size_t batch_size = 1; + size_t in_channels = 4; + size_t out_channels = 8; + size_t weight_size = 3; + size_t input_data_size = 15; + size_t padding = 3; + size_t stride = 2; + size_t dilation = 2; + string padding_mode = "zeros"; + bool use_bias = true; + + size_t out_channels_2 = 7; + size_t weight_size_2 = 3; + size_t stride_2 = 2; + size_t padding_2 = 3; + size_t dilation_2 = 4; + string padding_mode_2 = "zeros"; + bool use_bias_2 = true; + + Tensor<> test_weight = Tensor<>({out_channels, in_channels, weight_size, weight_size}, 0.0f); + Tensor<> test_bias = Tensor<>({out_channels}, 0.0f); + + size_t val = 1; + for (size_t i = 0; i < out_channels; i++) + { + for (size_t j = 0; j < in_channels; j++) + { + for (size_t k = 0; k < weight_size; k++) + { + for (size_t l = 0; l < weight_size; l++) + { + test_weight[i, j, k, l] = val; + val++; + } + } + } + } + + val = 1; + for (size_t i = 0; i < out_channels; i++) + { + test_bias[i] = val; + val++; + } + + Tensor<> test_weight_2 = Tensor<>({out_channels_2, out_channels, weight_size_2, weight_size_2}, 0.0f); + Tensor<> test_bias_2 = Tensor<>({out_channels_2}, 0.0f); + + double val_2 = 0.1; + for (size_t i = 0; i < out_channels_2; i++) + { + for (size_t j = 0; j < out_channels; j++) + { + for (size_t k = 0; k < weight_size_2; k++) + { + for (size_t l = 0; l < weight_size_2; l++) + { + test_weight_2[i, j, k, l] = val_2; + val_2 += 0.1; + } + } + } + } + + val_2 = 0.1; + for (size_t i = 0; i < out_channels_2; i++) + { + test_bias_2[i] = val_2; + val_2 += 0.1; + } + + Tensor<> test_input = Tensor<>({batch_size, in_channels, input_data_size, input_data_size}, 0.0f); + + val = 1; + for (size_t i = 0; i < batch_size; i++) + { + for (size_t j = 0; j < in_channels; j++) + { + for (size_t k = 0; k < input_data_size; k++) + { + for (size_t l = 0; l < input_data_size; l++) + { + test_input[i, j, k, l] = val; + val++; + } + } + } + } + // cout << "Test weight: " << endl; + // test_weight.print(); + // cout << endl; + + cout << "Test bias: " << endl; + test_bias.print(); + cout << endl; + + Conv2d conv2d_1(in_channels, out_channels, weight_size, padding, stride, dilation, padding_mode, use_bias); + + cout << "Conv2d layer 1 initialized with in_channels = " << in_channels << " and out_channels = " << out_channels << endl; + + Conv2d conv2d_2(out_channels, out_channels_2, weight_size_2, padding_2, stride_2, dilation_2, padding_mode_2, use_bias_2); + + conv2d_1.set_weight(test_weight); + conv2d_1.set_bias(test_bias); + + conv2d_2.set_weight(test_weight_2); + conv2d_2.set_bias(test_bias_2); + + // cout << "Test input: " << endl; + // test_input.print(); + // cout << endl; + + Tensor<> output = conv2d_1(test_input); + Tensor<> output_2 = conv2d_2(output); + + cout << "Output: " << endl; + output.print(); + cout << endl; + + cout << "Output 2: " << endl; + output_2.print(); + cout << endl; + + cout << "output shape : "; + for (int i = 0; i < output.ndim(); i++) + { + cout << output.shapes()[i] << " "; + } + cout << endl; + + cout << "output_2 shape : "; + for (int i = 0; i < output_2.ndim(); i++) + { + cout << output_2.shapes()[i] << " "; + } + cout << endl; + return 0; } \ No newline at end of file From 6e33f4e87cfd2d3243c7529cd6aa29d353e69e43 Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Wed, 12 Feb 2025 21:54:18 +0800 Subject: [PATCH 25/36] refractor: change from int64_t to size_t (probably will change it back) --- include/modules/layers/conv2d.hpp | 28 ++++++++++++--------- include/utils/conv2d_utils.hpp | 18 +++++++------- src/modules/layers/conv2d.cpp | 41 +++++++++++++++++-------------- src/utils/conv2d_utils.cpp | 19 ++++++++++---- 4 files changed, 63 insertions(+), 43 deletions(-) diff --git a/include/modules/layers/conv2d.hpp b/include/modules/layers/conv2d.hpp index c301ee2..6aeb14f 100644 --- a/include/modules/layers/conv2d.hpp +++ b/include/modules/layers/conv2d.hpp @@ -10,12 +10,12 @@ namespace nn class Conv2d : public Module { public: - Conv2d(int64_t in_channels, - int64_t out_channels, + Conv2d(size_t in_channels, + size_t out_channels, var_pair kernel_size, - var_pair stride = 1, - var_pair padding = 0, - var_pair dilation = 1, + var_pair stride = (size_t)1, + var_pair padding = (size_t)0, + var_pair dilation = (size_t)1, const string &padding_mode = "zeros", bool bias = true); @@ -23,13 +23,19 @@ namespace nn virtual Tensor<> backward(const Tensor<> &grad_output) override; virtual void update_params(const float lr) override; + void set_weight(const Tensor<> &target_weight) { this->weight_ = target_weight; } + void set_bias(const Tensor<> &target_bias) { this->bias_ = target_bias; } + + const Tensor<> &get_weight() const { return this->weight_; } + const Tensor<> &get_bias() const { return this->bias_; } + private: - int64_t in_channels_; - int64_t out_channels_; - int2 kernel_size_; - int2 stride_; - int2 padding_; - int2 dilation_; + size_t in_channels_; + size_t out_channels_; + size_tp2 kernel_size_; + size_tp2 stride_; + size_tp2 padding_; + size_tp2 dilation_; bool use_bias_; PaddingMode padding_mode_; Padding padding_module_; diff --git a/include/utils/conv2d_utils.hpp b/include/utils/conv2d_utils.hpp index dd70ddd..a53b9bd 100644 --- a/include/utils/conv2d_utils.hpp +++ b/include/utils/conv2d_utils.hpp @@ -1,8 +1,8 @@ #include "tensor.hpp" using namespace std; -using int2 = std::pair; -using var_pair = std::variant; +using size_tp2 = std::pair; +using var_pair = std::variant; enum class PaddingMode { @@ -15,20 +15,20 @@ class Padding { public: Padding() = default; - Padding(int2 padding, PaddingMode padding_mode) : padding_(padding), padding_mode_(padding_mode) {} - Tensor<> pad(const Tensor<> &input, const int2 &padding) const; - Tensor<> zero_pad(const Tensor<> &input, const int2 &padding) const; + Padding(size_tp2 padding, PaddingMode padding_mode) : padding_(padding), padding_mode_(padding_mode) {} + Tensor<> pad(const Tensor<> &input, const size_tp2 &padding) const; + Tensor<> zero_pad(const Tensor<> &input, const size_tp2 &padding) const; private: - int2 padding_; + size_tp2 padding_; PaddingMode padding_mode_; }; Tensor<> -convolution(const int2 &stride, const int2 &dilation, const vector &output_shape, const Tensor<> &input, const Tensor<> &kernel, const Tensor<> &bias, bool use_bias); +convolution(const size_tp2 &stride, const size_tp2 &dilation, const vector &output_shape, const Tensor<> &input, const Tensor<> &kernel, const Tensor<> &bias, bool use_bias); -const vector calculate_output_shape(const vector &input_shape, const int64_t out_channel, const int2 &kernel_size, const int2 &stride, const int2 &padding, const int2 &dilation); +const vector calculate_output_shape(const vector &input_shape, const int64_t out_channel, const size_tp2 &kernel_size, const size_tp2 &stride, const size_tp2 &padding, const size_tp2 &dilation); Tensor<> flip_vertical_and_horizontal(const Tensor<> &input); -Tensor<> dilate_input(const Tensor<> &input, const int2 &dilation); \ No newline at end of file +Tensor<> dilate_input(const Tensor<> &input, const size_tp2 &dilation); \ No newline at end of file diff --git a/src/modules/layers/conv2d.cpp b/src/modules/layers/conv2d.cpp index 30d06c2..5098446 100644 --- a/src/modules/layers/conv2d.cpp +++ b/src/modules/layers/conv2d.cpp @@ -1,11 +1,11 @@ #include "conv2d.hpp" using namespace nn; -Conv2d::Conv2d(int64_t in_channels, - int64_t out_channels, +Conv2d::Conv2d(size_t in_channels, + size_t out_channels, var_pair kernel_size, - var_pair stride, var_pair padding, + var_pair stride, var_pair dilation, const string &padding_mode, bool bias) @@ -15,10 +15,10 @@ Conv2d::Conv2d(int64_t in_channels, this->use_bias_ = bias; // Helper lambda to process variant parameters - auto process_variant = [](auto &&arg) -> int2 + auto process_variant = [](auto &&arg) -> size_tp2 { using T = std::decay_t; - if constexpr (std::is_same_v) + if constexpr (std::is_same_v) { if (arg < 0) { @@ -28,20 +28,23 @@ Conv2d::Conv2d(int64_t in_channels, } else { - static_assert(std::is_same_v>, "Unexpected type in variant"); - if (arg.first < 0 || arg.second < 0) - { - throw std::invalid_argument("Negative kernel size, stride, padding, or dilation is not supported"); - } + static_assert(std::is_same_v, "Unexpected type in variant"); return arg; } }; + // Set kernel size, stride, padding, and dilation this->kernel_size_ = std::visit(process_variant, kernel_size); this->stride_ = std::visit(process_variant, stride); this->padding_ = std::visit(process_variant, padding); this->dilation_ = std::visit(process_variant, dilation); + cout << "Kernel Size : " << this->kernel_size_.first << ", " << this->kernel_size_.second << endl; + cout << "Stride : " << this->stride_.first << ", " << this->stride_.second << endl; + cout << "Padding : " << this->padding_.first << ", " << this->padding_.second << endl; + cout << "Dilation : " << this->dilation_.first << ", " << this->dilation_.second << endl; + + // Check if padding mode is valid unordered_map all_padding_modes = {{"zeros", PaddingMode::ZEROS}, {"reflect", PaddingMode::REFLECT}, {"replicate", PaddingMode::REPLICATE}}; if (all_padding_modes.find(padding_mode) == all_padding_modes.end()) @@ -49,16 +52,18 @@ Conv2d::Conv2d(int64_t in_channels, throw std::invalid_argument("Padding mode must be one of 'zeros', 'reflect', or 'replicate'"); } + // Set padding mode this->padding_mode_ = all_padding_modes[padding_mode]; this->padding_module_ = Padding(this->padding_, this->padding_mode_); - vector weight_shape = {(size_t)this->out_channels_, (size_t)this->in_channels_, (size_t)this->kernel_size_.first, (size_t)this->kernel_size_.second}; + // Initialize weights and bias + vector weight_shape = {this->out_channels_, this->in_channels_, this->kernel_size_.first, this->kernel_size_.second}; this->weight_ = Tensor<>(weight_shape, 0.0); if (this->use_bias_) { - vector bias_shape = {(size_t)this->out_channels_}; + vector bias_shape = {this->out_channels_}; this->bias_ = Tensor<>(bias_shape, 0.0); } } @@ -68,7 +73,7 @@ Tensor<> Conv2d::forward(const Tensor<> &input) Tensor<> input_data = input; this->original_input_shape_ = input.shapes(); - vector output_shape = calculate_output_shape(input.shapes(), this->out_channels_, this->kernel_size_, this->stride_, this->padding_, this->dilation_); + const vector &output_shape = calculate_output_shape(input.shapes(), this->out_channels_, this->kernel_size_, this->stride_, this->padding_, this->dilation_); if (this->padding_.first > 0 && this->padding_.second > 0) { @@ -86,17 +91,17 @@ Tensor<> Conv2d::backward(const Tensor<> &grad_output) // dL_dY = grad_output // dL_dW = conv(input_data, dL_dY) - Tensor<> permuted_input = this->input_cache_.permute({1, 0, 2, 3}); - Tensor<> permuted_grad_output = grad_output.permute({1, 0, 2, 3}); + Tensor<> permuted_input = this->input_cache_.permute(1, 0, 2, 3); + Tensor<> permuted_grad_output = grad_output.permute(1, 0, 2, 3); this->grad_weight_ = convolution(this->dilation_, this->stride_, this->weight_.shapes(), permuted_input, permuted_grad_output, Tensor<>(), false); - this->grad_weight_ = this->grad_weight_.permute({1, 0, 2, 3}); + this->grad_weight_ = this->grad_weight_.permute(1, 0, 2, 3); // dL_dB = sum(dL_dY, dims=(0, 2, 3)) if (this->use_bias_) { - this->grad_bias_ = Tensor<>({(size_t)this->out_channels_}, 0.0); + this->grad_bias_ = Tensor<>({this->out_channels_}, 0.0); for (size_t i = 0; i < grad_output.shapes()[0]; i++) { for (size_t j = 0; j < grad_output.shapes()[1]; j++) @@ -114,7 +119,7 @@ Tensor<> Conv2d::backward(const Tensor<> &grad_output) // dL_dX = fullconv(dL_dY, W) Tensor<> flipped_weight = flip_vertical_and_horizontal(this->weight_); - Tensor<> permuted_flipped_weight = flipped_weight.permute({1, 0, 2, 3}); + Tensor<> permuted_flipped_weight = flipped_weight.permute(1, 0, 2, 3); Tensor<> copy_grad_output = grad_output; diff --git a/src/utils/conv2d_utils.cpp b/src/utils/conv2d_utils.cpp index 4f161a0..21069f3 100644 --- a/src/utils/conv2d_utils.cpp +++ b/src/utils/conv2d_utils.cpp @@ -1,6 +1,6 @@ #include "conv2d_utils.hpp" -Tensor<> Padding::pad(const Tensor<> &input, const int2 &padding) const +Tensor<> Padding::pad(const Tensor<> &input, const size_tp2 &padding) const { switch (this->padding_mode_) { @@ -12,7 +12,7 @@ Tensor<> Padding::pad(const Tensor<> &input, const int2 &padding) const } } -Tensor<> Padding::zero_pad(const Tensor<> &input, const int2 &padding) const +Tensor<> Padding::zero_pad(const Tensor<> &input, const size_tp2 &padding) const { const vector &input_shape = input.shapes(); @@ -48,7 +48,7 @@ Tensor<> Padding::zero_pad(const Tensor<> &input, const int2 &padding) const return padded_output; } -Tensor<> convolution(const int2 &stride, const int2 &dilation, const vector &output_shape, const Tensor<> &input, const Tensor<> &kernel, const Tensor<> &bias, bool use_bias) +Tensor<> convolution(const size_tp2 &stride, const size_tp2 &dilation, const vector &output_shape, const Tensor<> &input, const Tensor<> &kernel, const Tensor<> &bias, bool use_bias) { const vector &input_shape = input.shapes(); const vector &kernel_shape = kernel.shapes(); @@ -151,7 +151,7 @@ Tensor<> convolution(const int2 &stride, const int2 &dilation, const vector calculate_output_shape(const vector &input_shape, const int64_t out_channel, const int2 &kernel_size, const int2 &stride, const int2 &padding, const int2 &dilation) +const vector calculate_output_shape(const vector &input_shape, const int64_t out_channel, const size_tp2 &kernel_size, const size_tp2 &stride, const size_tp2 &padding, const size_tp2 &dilation) { if (input_shape.size() != 4) { @@ -162,6 +162,15 @@ const vector calculate_output_shape(const vector &input_shape, c const size_t H_in = input_shape[2]; const size_t W_in = input_shape[3]; + cout << "Btach Size : " << B << endl; + cout << "H_in : " << H_in << endl; + cout << "W_in : " << W_in << endl; + cout << "Out Channel : " << out_channel << endl; + cout << "Kernel Size : " << kernel_size.first << ", " << kernel_size.second << endl; + cout << "Stride : " << stride.first << ", " << stride.second << endl; + cout << "Padding : " << padding.first << ", " << padding.second << endl; + cout << "Dilation : " << dilation.first << ", " << dilation.second << endl; + const int64_t H_out = (H_in + 2 * padding.first - dilation.first * (kernel_size.first - 1) - 1) / stride.first + 1; const int64_t W_out = (W_in + 2 * padding.second - dilation.second * (kernel_size.second - 1) - 1) / stride.second + 1; @@ -212,7 +221,7 @@ Tensor<> flip_vertical_and_horizontal(const Tensor<> &input) return output; } -Tensor<> dilate_input(const Tensor<> &input, const int2 &dilation) +Tensor<> dilate_input(const Tensor<> &input, const size_tp2 &dilation) { if (input.ndim() != 4) { From e2b5cd3aac8516da214ece18d3f0d3ffbf42032f Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Wed, 12 Feb 2025 21:54:45 +0800 Subject: [PATCH 26/36] fix: fix the scalar mul by using copy constructor --- include/core/tensor.hpp | 50 +++++++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/include/core/tensor.hpp b/include/core/tensor.hpp index afd29a4..4687296 100644 --- a/include/core/tensor.hpp +++ b/include/core/tensor.hpp @@ -472,11 +472,11 @@ class Tensor // Multiply all elements of tensor with the given scaler Tensor mul(const T &scaler) const { - Tensor result(this->shapes_, static_cast(0)); + Tensor result = *this; for (size_t i = 0; i < this->size(); i++) { - (*result.data_)[i] = (*this->data_)[i] * scaler; + (*result.data_)[i] *= scaler; } return result; } @@ -647,19 +647,22 @@ class Tensor return result; } - Tensor permute(const initializer_list &dims) const + template + Tensor permute(Dims... dims) const { + vector perm_dims = {static_cast(dims)...}; + size_t ndim = this->ndim(); - if (dims.size() != ndim) + if (perm_dims.size() != ndim) { throw std::invalid_argument("Number of dimensions in permutation must match tensor's number of dimensions"); } - unordered_set seen_dims; - for (int64_t dim : dims) + unordered_set seen_dims; + for (size_t dim : perm_dims) { - if (dim < 0 || dim >= ndim) + if (dim >= ndim) { throw out_of_range("Permute dimension out of range"); } @@ -674,7 +677,7 @@ class Tensor vector new_strides(ndim); size_t i = 0; - for (size_t dim : dims) + for (size_t dim : perm_dims) { if (dim >= ndim) { @@ -899,7 +902,36 @@ class Tensor return result; } - vector to_vector() const { return (*this->data_); } + static Tensor arange(size_t start, size_t end = 0, vector shape = {0}) + { + if (start == end) // if only one argument is provided + { + throw runtime_error("arange() missing required argument: 'end'"); + } + if (end == 0) + { + end = start; + start = 0; + } + + if (shape.size() == 1 && shape[0] <= 0) + { + shape[0] = end - start + 1; + } + + Tensor result(shape, static_cast(0)); + + cout << "In arange, weight address: " << &result.data_ << endl; + + size_t idx = 0; + for (size_t i = start; i <= end; i++) + { + (*result.data_)[idx] = static_cast(i); + idx++; + } + + return result; + } // Get the dimension of the tensor inline size_t ndim() const From 160be1b1f905aa21c028febe81f48743d3fb1d2c Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Wed, 12 Feb 2025 21:54:55 +0800 Subject: [PATCH 27/36] refractor --- include/modules/layers/linear.hpp | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/include/modules/layers/linear.hpp b/include/modules/layers/linear.hpp index bd77598..f32ac8d 100644 --- a/include/modules/layers/linear.hpp +++ b/include/modules/layers/linear.hpp @@ -1,25 +1,27 @@ #pragma once #include "module.hpp" -namespace nn { +namespace nn +{ -class Linear : public Module{ + class Linear : public Module + { public: Linear(size_t in_features, size_t out_features, bool bias); - - virtual Tensor<> forward(const Tensor<>& input) override; - virtual Tensor<> backward(const Tensor<>& grad_output) override; + + virtual Tensor<> forward(const Tensor<> &input) override; + virtual Tensor<> backward(const Tensor<> &grad_output) override; virtual void update_params(const float lr) override; void randomizeParams(); // setters - inline void set_weights(const Tensor<>& target_weight) { this->weight_ = target_weight; }; - inline void set_biases(const Tensor<>& target_bias) { this->bias_ = target_bias; } + inline void set_weight(const Tensor<> &target_weight) { this->weight_ = target_weight; }; + inline void set_bias(const Tensor<> &target_bias) { this->bias_ = target_bias; } // getters - inline const Tensor<>& getWeights() const { return this->weight_; } - inline const Tensor<>& getBiases() const { return this->bias_; } + inline const Tensor<> &get_weight() const { return this->weight_; } + inline const Tensor<> &get_bias() const { return this->bias_; } private: size_t in_features_; @@ -29,6 +31,6 @@ class Linear : public Module{ Tensor<> bias_; Tensor<> grad_weight_; Tensor<> grad_bias_; - }; + }; } \ No newline at end of file From cc81046d12e64d06e9b927dfc80dbc817e8f1cf7 Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Tue, 18 Feb 2025 23:32:38 +0800 Subject: [PATCH 28/36] feat: add parameters initialization --- include/modules/layers/conv2d.hpp | 2 ++ src/modules/layers/conv2d.cpp | 52 +++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/include/modules/layers/conv2d.hpp b/include/modules/layers/conv2d.hpp index 6aeb14f..1868be9 100644 --- a/include/modules/layers/conv2d.hpp +++ b/include/modules/layers/conv2d.hpp @@ -23,6 +23,8 @@ namespace nn virtual Tensor<> backward(const Tensor<> &grad_output) override; virtual void update_params(const float lr) override; + void reset_parameters(); + void set_weight(const Tensor<> &target_weight) { this->weight_ = target_weight; } void set_bias(const Tensor<> &target_bias) { this->bias_ = target_bias; } diff --git a/src/modules/layers/conv2d.cpp b/src/modules/layers/conv2d.cpp index 5098446..a633b4e 100644 --- a/src/modules/layers/conv2d.cpp +++ b/src/modules/layers/conv2d.cpp @@ -1,3 +1,5 @@ +#include +#include #include "conv2d.hpp" using namespace nn; @@ -66,6 +68,9 @@ Conv2d::Conv2d(size_t in_channels, vector bias_shape = {this->out_channels_}; this->bias_ = Tensor<>(bias_shape, 0.0); } + + // randomize the weights and bias based on PyTorch implementation + this->reset_parameters(); } Tensor<> Conv2d::forward(const Tensor<> &input) @@ -160,3 +165,50 @@ void Conv2d::update_params(const float lr) return; } + +void Conv2d::reset_parameters() +{ + /* + PyTorch implementation: + + n = self.in_channels + for k in self.kernel_size: + n *= k + stdv = 1. / math.sqrt(n) + self.weight.data.uniform_(-stdv, stdv) + if self.bias is not None: + self.bias.data.uniform_(-stdv, stdv) + */ + + size_t n = this->in_channels_; + n *= this->kernel_size_.first * this->kernel_size_.second; + + const double stdv = 1.0 / sqrt(n); + + // Set up the random number generator + random_device rd; + mt19937 gen(rd()); + uniform_real_distribution dis(-stdv, stdv); + + for (size_t i = 0; i < this->out_channels_; i++) + { + for (size_t j = 0; j < this->in_channels_; j++) + { + for (size_t k = 0; k < this->kernel_size_.first; k++) + { + for (size_t l = 0; l < this->kernel_size_.second; l++) + { + this->weight_[i, j, k, l] = dis(gen); + } + } + } + } + + if (this->use_bias_) + { + for (size_t i = 0; i < this->out_channels_; i++) + { + this->bias_[i] = dis(gen); + } + } +} From 89cf3512a600199d2703524be2b418d66b623fff Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Tue, 18 Feb 2025 23:33:06 +0800 Subject: [PATCH 29/36] refractor: change the parameter initialization function name --- include/modules/layers/linear.hpp | 2 +- src/modules/layers/linear.cpp | 29 +++++++++++++++++++++-------- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/include/modules/layers/linear.hpp b/include/modules/layers/linear.hpp index f32ac8d..036ed50 100644 --- a/include/modules/layers/linear.hpp +++ b/include/modules/layers/linear.hpp @@ -13,7 +13,7 @@ namespace nn virtual Tensor<> backward(const Tensor<> &grad_output) override; virtual void update_params(const float lr) override; - void randomizeParams(); + void reset_parameters(); // setters inline void set_weight(const Tensor<> &target_weight) { this->weight_ = target_weight; }; diff --git a/src/modules/layers/linear.cpp b/src/modules/layers/linear.cpp index 78838b7..fa1ee40 100644 --- a/src/modules/layers/linear.cpp +++ b/src/modules/layers/linear.cpp @@ -12,8 +12,8 @@ Linear::Linear(size_t in_features, size_t out_features, bool bias) : in_features this->bias_ = Tensor<>({out_features, 1}, 0.0f); } - // randomize the weights. The bias is originally 0. - this->randomizeParams(); + // randomize the weights and bias based on PyTorch implementation + this->reset_parameters(); cout << "Linear layer initialized with in_features = " << in_features << " and out_features = " << out_features << endl; cout << &this->input_cache_ << endl; @@ -81,17 +81,24 @@ void Linear::update_params(const float lr) return; } -void Linear::randomizeParams() +void Linear::reset_parameters() { + /* + PyTorch implementation: + + stdv = 1. / math.sqrt(self.weight.size(1)) + self.weight.data.uniform_(-stdv, stdv) + if self.bias is not None: + self.bias.data.uniform_(-stdv, stdv) + + */ // Calculate the limit for the uniform distribution - double limit = sqrt(6.0f / (this->in_features_ + this->out_features_)); + const double stdv = 1.0 / sqrt(this->weight_.shapes()[0]); // since the weight is transposed // Set up the random number generator random_device rd; mt19937 gen(rd()); - uniform_real_distribution dis(-limit, limit); - - cout << "Starting randomization" << endl; + uniform_real_distribution dis(-stdv, stdv); // Xavier initialization for (size_t i = 0; i < this->in_features_; i++) @@ -102,5 +109,11 @@ void Linear::randomizeParams() } } - cout << "Finished randomization" << endl; + if (this->use_bias_) + { + for (size_t i = 0; i < this->out_features_; i++) + { + this->bias_[i, 0] = dis(gen); + } + } } \ No newline at end of file From bcf962fc0835a66f7483b651f8ef005b35c91d0e Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Wed, 19 Feb 2025 00:08:08 +0800 Subject: [PATCH 30/36] feat: finish reshape after considering stride and permute --- include/core/tensor.hpp | 143 ++++++++++++++++++--------------- include/utils/tensor_utils.hpp | 3 +- 2 files changed, 82 insertions(+), 64 deletions(-) diff --git a/include/core/tensor.hpp b/include/core/tensor.hpp index 4687296..e567640 100644 --- a/include/core/tensor.hpp +++ b/include/core/tensor.hpp @@ -7,7 +7,7 @@ class Tensor { private: shared_ptr> data_ = nullptr; // data is stored as a 1D vector // shared_ptr is used to avoid copying data - vector shapes_; // store the dimensions of the tensor + vector shape_; // store the dimensions of the tensor vector strides_; // store the strides of the tensor size_t offset_ = 0; // offset for slicing mutable int64_t size_ = -1; // it can be changed by const member functions (in size() function) @@ -29,7 +29,7 @@ class Tensor const string indent_str(indent, ' '); // Handle empty dimensions - if (this->shapes_[dim] == 0) + if (this->shape_[dim] == 0) { cout << indent_str << "[]"; return; @@ -39,20 +39,20 @@ class Tensor if (dim == this->ndim() - 1) { // Last dimension - for (size_t i = 0; i < this->shapes_[dim]; ++i) + for (size_t i = 0; i < this->shape_[dim]; ++i) { cout << (*this->data_)[offset + i * this->strides_[dim]]; - if (i < this->shapes_[dim] - 1) + if (i < this->shape_[dim] - 1) cout << ", "; } } else { cout << "\n"; - for (size_t i = 0; i < this->shapes_[dim]; ++i) + for (size_t i = 0; i < this->shape_[dim]; ++i) { print_recursive_impl(dim + 1, offset + i * this->strides_[dim], indent + 2); - if (i < this->shapes_[dim] - 1) + if (i < this->shape_[dim] - 1) cout << ",\n"; } cout << "\n" @@ -75,7 +75,7 @@ class Tensor // Check bounds for (size_t i = 0; i < idxs.size(); ++i) { - size_t normalized_idx = normalize_index(idxs[i], this->shapes_[i]); + size_t normalized_idx = normalize_index(idxs[i], this->shape_[i]); normalized_idxs.push_back(normalized_idx); } @@ -102,8 +102,8 @@ class Tensor } // Determine tensor dimensions - const size_t num_rows = (ndim == 2) ? this->shapes_[0] : 1; - const size_t num_cols = (ndim == 2) ? this->shapes_[1] : this->shapes_[0]; + const size_t num_rows = (ndim == 2) ? this->shape_[0] : 1; + const size_t num_cols = (ndim == 2) ? this->shape_[1] : this->shape_[0]; vector result(num_rows); @@ -170,14 +170,14 @@ class Tensor Tensor arithmetic_operation_impl(ArithmeticOp op, const Tensor &other) const { - if (other.shapes_ != this->shapes_) + if (other.shape_ != this->shape_) { throw runtime_error("Shape mismatch in arithmetic operation"); } size_t ndim = this->ndim(); - Tensor result(this->shapes_, static_cast(0)); + Tensor result(this->shape_, static_cast(0)); // Precompute result's contiguous strides for index calculation const vector &result_strides = result.strides_; @@ -206,7 +206,7 @@ class Tensor } // Helper function to cacluate the stride of the tensor - void calculate_strides() + void compute_contiguous_strides() { this->strides_.resize(this->ndim(), 0); @@ -215,7 +215,7 @@ class Tensor for (int64_t i = this->ndim() - 1; i >= 0; --i) { this->strides_[i] = stride; - stride *= this->shapes_[i]; + stride *= this->shape_[i]; } } @@ -225,7 +225,7 @@ class Tensor size_t remaining = idx; - for (int dim = 0; dim < ndim; ++dim) + for (int64_t dim = 0; dim < ndim; ++dim) { indices[dim] = remaining / result_strides[dim]; remaining %= result_strides[dim]; @@ -235,7 +235,7 @@ class Tensor size_t a_offset = this->offset_; size_t b_offset = other.offset_; - for (int dim = 0; dim < ndim; ++dim) + for (int64_t dim = 0; dim < ndim; ++dim) { a_offset += indices[dim] * this->strides_[dim]; b_offset += indices[dim] * other.strides_[dim]; @@ -249,15 +249,15 @@ class Tensor void flatten_vector(const std::vector &vec, size_t depth = 0) { // Add current level's size to shapes - if (depth == this->shapes_.size()) + if (depth == this->shape_.size()) { // First encounter with this depth: record size - this->shapes_.push_back(vec.size()); + this->shape_.push_back(vec.size()); } else { // Verify size matches the existing dimension - if (vec.size() != this->shapes_[depth]) + if (vec.size() != this->shape_[depth]) { throw std::invalid_argument("Inconsistent shape at depth " + std::to_string(depth)); } @@ -309,7 +309,7 @@ class Tensor { this->data_ = make_shared>(); flatten_vector(input); - this->calculate_strides(); + this->compute_contiguous_strides(); } // // Recursive helper to process nested initializer lists @@ -344,17 +344,17 @@ class Tensor // Scaler constructor Tensor(const T &value) { - this->shapes_ = vector{1}; + this->shape_ = vector{1}; this->data_ = make_shared>(1, value); - this->calculate_strides(); + this->compute_contiguous_strides(); } // 1D tensor constructor Tensor(const initializer_list &data_1d) { this->data_ = make_shared>(data_1d.begin(), data_1d.end()); - this->shapes_ = vector{data_1d.size()}; - this->calculate_strides(); + this->shape_ = vector{data_1d.size()}; + this->compute_contiguous_strides(); } // 2D tensor constructor @@ -362,7 +362,7 @@ class Tensor { const size_t n = data_2d.size(), m = data_2d.begin()->size(); - this->shapes_ = vector{n, m}; + this->shape_ = vector{n, m}; this->data_ = make_shared>(); this->data_->reserve(n * m); // Optimize memory allocation @@ -371,7 +371,7 @@ class Tensor { this->data_->insert(this->data_->end(), row.begin(), row.end()); } - this->calculate_strides(); + this->compute_contiguous_strides(); } // 3D tensor constructor @@ -379,7 +379,7 @@ class Tensor { const size_t n = data_3d.size(), m = data_3d.begin()->size(), l = data_3d.begin()->begin()->size(); - this->shapes_ = vector{n, m, l}; + this->shape_ = vector{n, m, l}; this->data_ = make_shared>(); this->data_->reserve(n * m * l); // Optimize memory allocation @@ -391,7 +391,7 @@ class Tensor this->data_->insert(this->data_->end(), row.begin(), row.end()); } } - this->calculate_strides(); + this->compute_contiguous_strides(); } // 4D tensor constructor @@ -399,7 +399,7 @@ class Tensor { const size_t n = data_4d.size(), m = data_4d.begin()->size(), l = data_4d.begin()->begin()->size(), k = data_4d.begin()->begin()->begin()->size(); - this->shapes_ = vector{n, m, l, k}; + this->shape_ = vector{n, m, l, k}; this->data_ = make_shared>(); this->data_->reserve(n * m * l * k); // Optimize memory allocation @@ -414,13 +414,13 @@ class Tensor } } } - this->calculate_strides(); + this->compute_contiguous_strides(); } // certin value constructor Tensor(const vector &shape, const T &value) { - this->shapes_ = shape; + this->shape_ = shape; size_t size = 1; for (const size_t &dim : shape) { @@ -428,7 +428,7 @@ class Tensor } this->data_ = make_shared>(size, value); - this->calculate_strides(); + this->compute_contiguous_strides(); } // copy constructor @@ -516,8 +516,8 @@ class Tensor throw std::runtime_error("Number of leading dimensions must match"); } - vector A_leading_shape(this->shapes_.begin(), this->shapes_.end() - 2); - vector B_leading_shape(other.shapes_.begin(), other.shapes_.end() - 2); + vector A_leading_shape(this->shape_.begin(), this->shape_.end() - 2); + vector B_leading_shape(other.shape_.begin(), other.shape_.end() - 2); if (A_leading_shape != B_leading_shape) { @@ -525,10 +525,10 @@ class Tensor } // Extract matrix dimensions - const size_t n = this->shapes_[A_ndim - 2]; - const size_t m = this->shapes_[A_ndim - 1]; - const size_t m_other = other.shapes_[B_ndim - 2]; - const size_t p = other.shapes_[B_ndim - 1]; + const size_t n = this->shape_[A_ndim - 2]; + const size_t m = this->shape_[A_ndim - 1]; + const size_t m_other = other.shape_[B_ndim - 2]; + const size_t p = other.shape_[B_ndim - 1]; if (m != m_other) { @@ -638,7 +638,7 @@ class Tensor // Create new tensor with swapped dimensions Tensor result = *this; - swap(result.shapes_[dim0], result.shapes_[dim1]); + swap(result.shape_[dim0], result.shape_[dim1]); swap(result.strides_[dim0], result.strides_[dim1]); // cout << "result.shapes_: " << result.shapes_[0] << " " << result.shapes_[1] << endl; @@ -684,13 +684,13 @@ class Tensor throw std::out_of_range("Permutation dimension out of range"); } - new_shapes[i] = this->shapes_[dim]; + new_shapes[i] = this->shape_[dim]; new_strides[i] = this->strides_[dim]; ++i; } Tensor result = *this; - result.shapes_ = new_shapes; + result.shape_ = new_shapes; result.strides_ = new_strides; return result; @@ -701,8 +701,8 @@ class Tensor /// @post The shape of the tensor is changed to 1D, with the same elements as the original tensor. void flatten() { - this->shapes_ = {this->size()}; - this->calculate_strides(); + this->shape_ = {this->size()}; + this->compute_contiguous_strides(); return; } @@ -772,12 +772,12 @@ class Tensor /// @return Tensor of integers where each element is 1 if the two tensors are equal at the same index, 0 otherwise Tensor equal(const Tensor &other) const { - if (other.shapes_ != this->shapes_) + if (other.shape_ != this->shape_) { throw runtime_error("Shape mismatch"); } - Tensor result(this->shapes_, static_cast(0)); + Tensor result(this->shape_, static_cast(0)); const vector &result_strides = result.strides_; for (size_t i = 0; i < this->size(); i++) @@ -795,7 +795,7 @@ class Tensor /// @return true if all elements are equal, false otherwise bool compare(const Tensor &other) const { - if (other.shapes_ != this->shapes_) + if (other.shape_ != this->shape_) { throw runtime_error("Shape mismatch"); } @@ -857,38 +857,55 @@ class Tensor /// @throws runtime_error if the new shape is not compatible with the current number of elements. void reshape(const vector &new_shape) { - size_t new_size = 1; - for (const size_t &dim : new_shape) + // Calculate total elements for both shapes + const int64_t current_elements = accumulate( + this->shape_.begin(), this->shape_.end(), 1, multiplies()); + const int64_t new_elements = accumulate( + new_shape.begin(), new_shape.end(), 1, multiplies()); + + if (current_elements != new_elements) { - new_size *= dim; + throw runtime_error("New shape must be compatible with the original shape"); } - if (new_size != this->size()) + vector original_strides(this->ndim(), 0); + int64_t stride = 1; + + for (int64_t i = this->ndim() - 1; i >= 0; --i) { - throw runtime_error("New shape must be compatible with the original shape"); + original_strides[i] = stride; + stride *= this->shape_[i]; + } + + if (original_strides != this->strides_) + { + cout << "Clone the tensor" << endl; + // Create a new tensor with contiguous data + Tensor result = this->clone(); + *this = result; } - this->shapes_ = new_shape; - this->calculate_strides(); + this->shape_ = new_shape; + this->compute_contiguous_strides(); return; } - /// @brief Return a deep copy of the tensor. Actually the same as the copy constructor. + /// @brief Return a deep copy of the tensor. The data is copied to a new contiguous storage (and this is the only difference from copy constructor). /// @details This function will create a new tensor with the same shape and data as the current tensor. /// @return a new tensor which is a deep copy of the current tensor Tensor clone() const { Tensor result; - result.shapes_ = this->shapes_; + result.shape_ = this->shape_; result.data_ = make_shared>(*(this->data_)); - result.calculate_strides(); + result.compute_contiguous_strides(); // Copy data from original tensor's view to the new contiguous storage for (size_t i = 0; i < this->size(); ++i) { - vector indices = linear_to_multi_idxs(i, result.shapes_); + vector indices = linear_to_multi_idxs(i, result.shape_); size_t src_offset = this->offset_; for (size_t dim = 0; dim < indices.size(); ++dim) @@ -936,7 +953,7 @@ class Tensor // Get the dimension of the tensor inline size_t ndim() const { - return this->shapes_.size(); + return this->shape_.size(); } const size_t size() const @@ -952,7 +969,7 @@ class Tensor } this->size_ = 1; - for (const size_t &s : this->shapes_) + for (const size_t &s : this->shape_) { this->size_ *= s; } @@ -969,7 +986,7 @@ class Tensor return; } - inline const vector &shapes() const { return this->shapes_; } + inline const vector &shapes() const { return this->shape_; } // ========================================operators overloading======================================== inline Tensor operator+(const Tensor &other) const { return this->add(other); } @@ -988,7 +1005,7 @@ class Tensor if (this == &other) return *this; - this->shapes_ = other.shapes_; + this->shape_ = other.shape_; this->data_ = make_shared>(*(other.data_)); this->strides_ = other.strides_; this->offset_ = other.offset_; @@ -1077,15 +1094,15 @@ class Tensor if (auto str_idx = get_if(&idx)) { Slice slice = Slice::parse(*str_idx); - expanded_indices.push_back(apply_slice(slice, this->shapes_[i])); + expanded_indices.push_back(apply_slice(slice, this->shape_[i])); } else if (auto int_idx = get_if(&idx)) { - expanded_indices.push_back({normalize_index(*int_idx, this->shapes_[i])}); + expanded_indices.push_back({normalize_index(*int_idx, this->shape_[i])}); } else if (auto slice_idx = get_if(&idx)) { - expanded_indices.push_back(apply_slice(*slice_idx, this->shapes_[i])); + expanded_indices.push_back(apply_slice(*slice_idx, this->shape_[i])); } else { diff --git a/include/utils/tensor_utils.hpp b/include/utils/tensor_utils.hpp index 345dfd8..94681e1 100644 --- a/include/utils/tensor_utils.hpp +++ b/include/utils/tensor_utils.hpp @@ -1,5 +1,6 @@ #pragma once #include +#include #include #include #include @@ -93,7 +94,7 @@ Tensor dtype_impl(const Tensor &tensor) { Tensor result; - result.shapes_ = tensor.shapes_; + result.shape_ = tensor.shape_; result.data_ = make_shared>(); result.data_->resize(tensor.data_->size()); result.strides_ = tensor.strides_; From da3f7f6513d1557f9ca3297a5ff72b5149b2db1b Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Wed, 19 Feb 2025 00:15:05 +0800 Subject: [PATCH 31/36] feat: add flatten module --- include/modules/layers/flatten.hpp | 22 +++++++++++++++++++++ src/modules/layers/flatten.cpp | 31 ++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 include/modules/layers/flatten.hpp create mode 100644 src/modules/layers/flatten.cpp diff --git a/include/modules/layers/flatten.hpp b/include/modules/layers/flatten.hpp new file mode 100644 index 0000000..33d5250 --- /dev/null +++ b/include/modules/layers/flatten.hpp @@ -0,0 +1,22 @@ +#pragma once +#include "module.hpp" + +namespace nn +{ + + class Flatten : public Module + { + public: + Flatten(int64_t start_dim = 1, int64_t end_dim = -1); + + virtual Tensor<> forward(const Tensor<> &input) override; + virtual Tensor<> backward(const Tensor<> &grad_output) override; + virtual void update_params(const float lr) override; + + private: + int64_t start_dim_; + int64_t end_dim_; + vector original_shape_; + }; + +} \ No newline at end of file diff --git a/src/modules/layers/flatten.cpp b/src/modules/layers/flatten.cpp new file mode 100644 index 0000000..94f53f3 --- /dev/null +++ b/src/modules/layers/flatten.cpp @@ -0,0 +1,31 @@ +#include "flatten.hpp" +using namespace nn; + +Flatten::Flatten(int64_t start_dim, int64_t end_dim) : start_dim_(start_dim), end_dim_(end_dim) +{ + cout << "Flatten layer initialized with start_dim = " << start_dim << " and end_dim = " << end_dim << endl; +} + +Tensor<> Flatten::forward(const Tensor<> &input) +{ + this->original_shape_ = input.shapes(); + + vector new_shape; + // return input.flatten(this->start_dim_, this->end_dim_); + + Tensor<> input_data = input; + input_data.reshape(new_shape); + + return input_data; +} + +Tensor<> Flatten::backward(const Tensor<> &grad_output) +{ + // return grad_output.reshape(this->original_shape_); + return Tensor<>(); +} + +void Flatten::update_params(const float lr) +{ + return; +} From 44e4c568bfbe9ea9f14f4386f986a5437293bc8e Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Thu, 20 Feb 2025 16:07:57 +0800 Subject: [PATCH 32/36] feat: finish the implementation of conv2d --- src/modules/layers/conv2d.cpp | 31 ++++++++++++++++++++++++++----- src/utils/conv2d_utils.cpp | 14 +++++++++----- 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/src/modules/layers/conv2d.cpp b/src/modules/layers/conv2d.cpp index a633b4e..5e7a87d 100644 --- a/src/modules/layers/conv2d.cpp +++ b/src/modules/layers/conv2d.cpp @@ -41,10 +41,10 @@ Conv2d::Conv2d(size_t in_channels, this->padding_ = std::visit(process_variant, padding); this->dilation_ = std::visit(process_variant, dilation); - cout << "Kernel Size : " << this->kernel_size_.first << ", " << this->kernel_size_.second << endl; - cout << "Stride : " << this->stride_.first << ", " << this->stride_.second << endl; - cout << "Padding : " << this->padding_.first << ", " << this->padding_.second << endl; - cout << "Dilation : " << this->dilation_.first << ", " << this->dilation_.second << endl; + // cout << "Kernel Size : " << this->kernel_size_.first << ", " << this->kernel_size_.second << endl; + // cout << "Stride : " << this->stride_.first << ", " << this->stride_.second << endl; + // cout << "Padding : " << this->padding_.first << ", " << this->padding_.second << endl; + // cout << "Dilation : " << this->dilation_.first << ", " << this->dilation_.second << endl; // Check if padding mode is valid unordered_map all_padding_modes = {{"zeros", PaddingMode::ZEROS}, {"reflect", PaddingMode::REFLECT}, {"replicate", PaddingMode::REPLICATE}}; @@ -99,8 +99,16 @@ Tensor<> Conv2d::backward(const Tensor<> &grad_output) Tensor<> permuted_input = this->input_cache_.permute(1, 0, 2, 3); Tensor<> permuted_grad_output = grad_output.permute(1, 0, 2, 3); - this->grad_weight_ = convolution(this->dilation_, this->stride_, this->weight_.shapes(), permuted_input, permuted_grad_output, Tensor<>(), false); + // The grad weight shape is initially permuted + const vector permuted_grad_weight_shape = {this->in_channels_, this->out_channels_, this->kernel_size_.first, this->kernel_size_.second}; + this->grad_weight_ = convolution(this->dilation_, this->stride_, permuted_grad_weight_shape, permuted_input, permuted_grad_output, Tensor<>(), false); + + cout << "grad_weight: " << endl; + this->grad_weight_.print(); + cout << endl; + + // The grad weight shape is permuted back to the original shape this->grad_weight_ = this->grad_weight_.permute(1, 0, 2, 3); // dL_dB = sum(dL_dY, dims=(0, 2, 3)) @@ -120,12 +128,25 @@ Tensor<> Conv2d::backward(const Tensor<> &grad_output) } } } + + cout << "grad_bias: " << endl; + this->grad_bias_.print(); + cout << endl; } // dL_dX = fullconv(dL_dY, W) Tensor<> flipped_weight = flip_vertical_and_horizontal(this->weight_); + cout << "flipped_weight: " << endl; + flipped_weight.print(); + cout << endl; + + Tensor<> permuted_flipped_weight = flipped_weight.permute(1, 0, 2, 3); + cout << "permuted_flipped_weight: " << endl; + permuted_flipped_weight.print(); + cout << endl; + Tensor<> copy_grad_output = grad_output; if (this->stride_.first > 1 || this->stride_.second > 1) diff --git a/src/utils/conv2d_utils.cpp b/src/utils/conv2d_utils.cpp index 21069f3..457e971 100644 --- a/src/utils/conv2d_utils.cpp +++ b/src/utils/conv2d_utils.cpp @@ -189,13 +189,15 @@ Tensor<> flip_vertical_and_horizontal(const Tensor<> &input) throw std::invalid_argument("Input shape must be 4D"); } - Tensor<> output(input.shapes(), 0.0); + Tensor<> output = input; const size_t B = input.shapes()[0]; const size_t C = input.shapes()[1]; const size_t H = input.shapes()[2]; const size_t W = input.shapes()[3]; + double cache; + for (size_t b = 0; b < B; ++b) { for (size_t c = 0; c < C; ++c) @@ -204,16 +206,18 @@ Tensor<> flip_vertical_and_horizontal(const Tensor<> &input) { for (size_t w = 0; w < W; ++w) { - output[b, c, h, w] = input[b, c, H - h - 1, w]; - output[b, c, H - h - 1, w] = input[b, c, h, w]; + cache = output[b, c, h, w]; + output[b, c, h, w] = output[b, c, H - h - 1, w]; + output[b, c, H - h - 1, w] = cache; } } for (size_t h = 0; h < H; ++h) { for (size_t w = 0; w < W / 2; ++w) { - output[b, c, h, w] = input[b, c, h, W - w - 1]; - output[b, c, h, W - w - 1] = input[b, c, h, w]; + cache = output[b, c, h, w]; + output[b, c, h, w] = output[b, c, h, W - w - 1]; + output[b, c, h, W - w - 1] = cache; } } } From 91a71a856b3c8a2c8a204661c58afc2d605ef86e Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Thu, 20 Feb 2025 16:08:08 +0800 Subject: [PATCH 33/36] feat: finish the implementation of flatten --- include/modules/layers/flatten.hpp | 2 +- src/modules/layers/flatten.cpp | 14 ++++---------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/include/modules/layers/flatten.hpp b/include/modules/layers/flatten.hpp index 33d5250..3a06f7a 100644 --- a/include/modules/layers/flatten.hpp +++ b/include/modules/layers/flatten.hpp @@ -16,7 +16,7 @@ namespace nn private: int64_t start_dim_; int64_t end_dim_; - vector original_shape_; + vector original_input_shape_; }; } \ No newline at end of file diff --git a/src/modules/layers/flatten.cpp b/src/modules/layers/flatten.cpp index 94f53f3..2c06798 100644 --- a/src/modules/layers/flatten.cpp +++ b/src/modules/layers/flatten.cpp @@ -8,24 +8,18 @@ Flatten::Flatten(int64_t start_dim, int64_t end_dim) : start_dim_(start_dim), en Tensor<> Flatten::forward(const Tensor<> &input) { - this->original_shape_ = input.shapes(); + this->original_input_shape_ = input.shapes(); - vector new_shape; - // return input.flatten(this->start_dim_, this->end_dim_); - - Tensor<> input_data = input; - input_data.reshape(new_shape); - - return input_data; + return input.flatten(this->start_dim_, this->end_dim_); } Tensor<> Flatten::backward(const Tensor<> &grad_output) { - // return grad_output.reshape(this->original_shape_); - return Tensor<>(); + return grad_output.reshape(this->original_input_shape_); } void Flatten::update_params(const float lr) { + // we don't need to update any parameters return; } From b266293a49adab9cfd9457e824450c745b8d1fd1 Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Thu, 20 Feb 2025 16:08:21 +0800 Subject: [PATCH 34/36] add playground of conv2d --- examples/test_conv2d.cpp | 132 +++++++++++++++++++++++++++++++++------ 1 file changed, 112 insertions(+), 20 deletions(-) diff --git a/examples/test_conv2d.cpp b/examples/test_conv2d.cpp index 3287a4e..f9bbd43 100644 --- a/examples/test_conv2d.cpp +++ b/examples/test_conv2d.cpp @@ -1,13 +1,17 @@ #include "conv2d.hpp" +#include "flatten.hpp" +#include "linear.hpp" +#include "cross_entropy.hpp" using namespace nn; int main() { - size_t batch_size = 1; + size_t batch_size = 2; + size_t input_data_size = 15; + size_t in_channels = 4; size_t out_channels = 8; size_t weight_size = 3; - size_t input_data_size = 15; size_t padding = 3; size_t stride = 2; size_t dilation = 2; @@ -16,16 +20,19 @@ int main() size_t out_channels_2 = 7; size_t weight_size_2 = 3; - size_t stride_2 = 2; size_t padding_2 = 3; + size_t stride_2 = 2; size_t dilation_2 = 4; string padding_mode_2 = "zeros"; bool use_bias_2 = true; + size_t in_features = out_channels_2 * input_data_size * input_data_size; + size_t out_features = 10; + Tensor<> test_weight = Tensor<>({out_channels, in_channels, weight_size, weight_size}, 0.0f); Tensor<> test_bias = Tensor<>({out_channels}, 0.0f); - size_t val = 1; + double val = 0.01; for (size_t i = 0; i < out_channels; i++) { for (size_t j = 0; j < in_channels; j++) @@ -35,23 +42,23 @@ int main() for (size_t l = 0; l < weight_size; l++) { test_weight[i, j, k, l] = val; - val++; + val += 0.01; } } } } - val = 1; + val = 0.01; for (size_t i = 0; i < out_channels; i++) { test_bias[i] = val; - val++; + val += 0.01; } Tensor<> test_weight_2 = Tensor<>({out_channels_2, out_channels, weight_size_2, weight_size_2}, 0.0f); Tensor<> test_bias_2 = Tensor<>({out_channels_2}, 0.0f); - double val_2 = 0.1; + val = 0.01; for (size_t i = 0; i < out_channels_2; i++) { for (size_t j = 0; j < out_channels; j++) @@ -60,23 +67,23 @@ int main() { for (size_t l = 0; l < weight_size_2; l++) { - test_weight_2[i, j, k, l] = val_2; - val_2 += 0.1; + test_weight_2[i, j, k, l] = val; + val += 0.01; } } } } - val_2 = 0.1; + val = 0.01; for (size_t i = 0; i < out_channels_2; i++) { - test_bias_2[i] = val_2; - val_2 += 0.1; + test_bias_2[i] = val; + val += 0.01; } Tensor<> test_input = Tensor<>({batch_size, in_channels, input_data_size, input_data_size}, 0.0f); - val = 1; + val = 0.01; for (size_t i = 0; i < batch_size; i++) { for (size_t j = 0; j < in_channels; j++) @@ -86,24 +93,36 @@ int main() for (size_t l = 0; l < input_data_size; l++) { test_input[i, j, k, l] = val; - val++; + val += 0.01; } } } } + + // cout << "Test input: " << endl; + // test_input.print(); + // cout << endl; + // cout << "Test weight: " << endl; // test_weight.print(); // cout << endl; - cout << "Test bias: " << endl; - test_bias.print(); - cout << endl; + // cout << "Test bias: " << endl; + // test_bias.print(); + // cout << endl; - Conv2d conv2d_1(in_channels, out_channels, weight_size, padding, stride, dilation, padding_mode, use_bias); + // cout << "Test weight 2: " << endl; + // test_weight_2.print(); + // cout << endl; - cout << "Conv2d layer 1 initialized with in_channels = " << in_channels << " and out_channels = " << out_channels << endl; + // cout << "Test bias 2: " << endl; + // test_bias_2.print(); + // cout << endl; + Conv2d conv2d_1(in_channels, out_channels, weight_size, padding, stride, dilation, padding_mode, use_bias); Conv2d conv2d_2(out_channels, out_channels_2, weight_size_2, padding_2, stride_2, dilation_2, padding_mode_2, use_bias_2); + Flatten flatten; + CrossEntropyLoss cross_entropy; conv2d_1.set_weight(test_weight); conv2d_1.set_bias(test_bias); @@ -140,5 +159,78 @@ int main() } cout << endl; + Tensor<> flattened_output_2 = flatten(output_2); + + cout << "Flattened output 2: " << endl; + flattened_output_2.print(); + cout << endl; + + Linear linear(flattened_output_2.shapes()[1], out_features, false); + + Tensor<> test_linear_weight({flattened_output_2.shapes()[1], out_features}, 0.0); + + val = 0.01; + for (size_t i = 0; i < flattened_output_2.shapes()[1]; ++i) + { + for (size_t j = 0; j < out_features; ++j) + { + test_linear_weight[i, j] = val; + val += 0.01; + } + } + + cout << "linear in features: " << flattened_output_2.shapes()[1] << endl; + cout << "linear out features: " << out_features << endl; + + linear.set_weight(test_linear_weight); + + cout << "Test linear weight: " << endl; + test_linear_weight.print(); + cout << endl; + + Tensor<> output_3 = linear(flattened_output_2); + + cout << "Output 3: " << endl; + output_3.print(); + cout << endl; + + Tensor<> labels({batch_size}, 0); + + val = 1; + for (size_t i = 0; i < batch_size; i++) + { + labels[i] = val; + val++; + } + + // output.print(); + // output_2.print(); + // output_3.print(); + + output_3 /= 1e6; + + double loss = cross_entropy(output_3, labels); + + cout << "Loss: " << loss << endl; + + Tensor<> dL_dV_2 = cross_entropy.backward(); + Tensor<> dL_dV_1 = linear.backward(dL_dV_2); + Tensor<> dL_dZ = flatten.backward(dL_dV_1); + + cout << "dL/dZ: " << endl; + dL_dZ.print(); + cout << endl; + + Tensor<> dL_dY = conv2d_2.backward(dL_dZ); + Tensor<> dL_dX = conv2d_1.backward(dL_dY); + + cout << "dL_dY: " << endl; + dL_dY.print(); + cout << endl; + + cout << "dL_dX: " << endl; + dL_dX.print(); + cout << endl; + return 0; } \ No newline at end of file From f03fa99a2cf8f9aa1774b9b713809ff7b701cfe4 Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Thu, 20 Feb 2025 16:08:30 +0800 Subject: [PATCH 35/36] add flatten module --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index f483866..cc64f52 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,6 +36,7 @@ set(SOURCE_FILES src/core/module.cpp src/modules/layers/linear.cpp src/modules/layers/conv2d.cpp + src/modules/layers/flatten.cpp src/utils/conv2d_utils.cpp src/modules/layers/dropout.cpp src/modules/layers/conv2d.cpp From d905ded5d571cdeb42f8f5bbbb5e09d6d0fcecdb Mon Sep 17 00:00:00 2001 From: lucaswychan Date: Thu, 20 Feb 2025 16:13:52 +0800 Subject: [PATCH 36/36] fix: fix flatten and filter based on unit_test --- include/core/tensor.hpp | 130 +++++++++++++++++++++++++---- tests/core/tensor_test.cpp | 165 ++++++++++++++++++++++--------------- 2 files changed, 209 insertions(+), 86 deletions(-) diff --git a/include/core/tensor.hpp b/include/core/tensor.hpp index e567640..00f201f 100644 --- a/include/core/tensor.hpp +++ b/include/core/tensor.hpp @@ -481,6 +481,18 @@ class Tensor return result; } + // Divide all elements of tensor with the given scaler + Tensor div(const T &scaler) const + { + Tensor result = *this; + + for (size_t i = 0; i < this->size(); i++) + { + (*result.data_)[i] /= scaler; + } + return result; + } + /** * Matrix multiplication of two tensors. * @@ -611,9 +623,8 @@ class Tensor if (ndim == 1 && dim0 == -2 && dim1 == -1) { - Tensor result = *this; - result.reshape({this->size(), 1}); - return result; + Tensor result = *this; + return result.reshape({this->size(), 1}); } if (dim0 == dim1) @@ -696,14 +707,60 @@ class Tensor return result; } - /// @brief Flatten the tensor into 1D in-place. - /// @details This function only changes the shape of the tensor, and does not modify the underlying data. - /// @post The shape of the tensor is changed to 1D, with the same elements as the original tensor. - void flatten() + /** + * Flattens the dimensions of the tensor from start_dim to end_dim into a single dimension. + * + * This function collapses multiple dimensions of the tensor into one, effectively reducing + * the number of dimensions by merging the specified range of dimensions. If start_dim or + * end_dim is negative, it will be counted from the last dimension backwards. The resulting + * tensor will have the same total number of elements as the original tensor. + * + * @param start_dim The starting dimension index to begin flattening. Defaults to 0. + * @param end_dim The ending dimension index to stop flattening. Defaults to -1, which + * refers to the last dimension. + * @return A new tensor with the specified dimensions flattened. + * + * @throws std::invalid_argument if start_dim is greater than end_dim. + * @throws std::out_of_range if start_dim or end_dim is out of the range of the tensor's dimensions. + */ + + Tensor<> flatten(int64_t start_dim = 0, int64_t end_dim = -1) const { - this->shape_ = {this->size()}; - this->compute_contiguous_strides(); - return; + if (start_dim < 0) + { + start_dim += this->ndim(); + } + if (end_dim < 0) + { + end_dim += this->ndim(); + } + + if (start_dim > end_dim) + { + throw invalid_argument("Start dimension must be less than or equal to end dimension"); + } + + if (start_dim < 0 || start_dim >= this->ndim() || end_dim < 0 || end_dim >= this->ndim()) + { + throw out_of_range("Flatten dimensions out of range"); + } + + vector new_shape; + new_shape.reserve(this->ndim() - (end_dim - start_dim + 1) + 1); + + for (size_t i = 0; i < this->ndim(); ++i) + { + if (i <= start_dim || i > end_dim) + { + new_shape.push_back(this->shape_[i]); + } + else + { + new_shape[new_shape.size() - 1] *= this->shape_[i]; + } + } + + return this->reshape(new_shape); } /// @brief Calculate the absolute value of each element in the tensor @@ -729,9 +786,9 @@ class Tensor for (size_t i = 0; i < this->size(); i++) { - if (func((*this->data_)[i])) + if (!func((*this->data_)[i])) { - (*result.data_)[i] = (*this->data_)[i]; + (*result.data_)[i] = static_cast(0); } } @@ -855,7 +912,7 @@ class Tensor /// The total number of elements must remain the same; otherwise, an exception is thrown. /// @param new_shape The desired shape for the tensor. /// @throws runtime_error if the new shape is not compatible with the current number of elements. - void reshape(const vector &new_shape) + Tensor<> reshape(const vector &new_shape) const { // Calculate total elements for both shapes const int64_t current_elements = accumulate( @@ -868,6 +925,7 @@ class Tensor throw runtime_error("New shape must be compatible with the original shape"); } + // Check if the data is stored in a contiguous way vector original_strides(this->ndim(), 0); int64_t stride = 1; @@ -877,18 +935,40 @@ class Tensor stride *= this->shape_[i]; } + Tensor result; + + // If the data is not stored in a contiguous way, the stride will not be a cumulative product of the shape if (original_strides != this->strides_) { cout << "Clone the tensor" << endl; + /* + This part is a little bit complicated + + Since the data may not store in a contiguous way, there is a problem when we directly change the shape of the tensor. + We will loss the tracking of the strides of the tensor. + + Therefore we have to make the data stored in a contiguous way first, then we can change the shape of the tensor. + clone() function will create a new tensor with the same shape and data as the current tensor. + + If we directly use copy constructor, the data will not be stored in a contiguous way. + Since I don't rearange the data in the copy constructor. + + Eventully the tensor data is guaranteed to be stored in the contiguous way, so we can directly change the shape of the tensor. + */ + // Create a new tensor with contiguous data - Tensor result = this->clone(); - *this = result; + result = this->clone(); + } + else + { + // the data is not stored in a contiguous way + result = *this; } - this->shape_ = new_shape; - this->compute_contiguous_strides(); + result.shape_ = new_shape; + result.compute_contiguous_strides(); - return; + return result; } /// @brief Return a deep copy of the tensor. The data is copied to a new contiguous storage (and this is the only difference from copy constructor). @@ -993,6 +1073,8 @@ class Tensor inline Tensor operator-(const Tensor &other) const { return this->sub(other); } inline Tensor operator*(const Tensor &other) const { return this->mul(other); } inline Tensor operator*(const T &scaler) const { return this->mul(scaler); } + inline Tensor operator/(const Tensor &other) const { return this->div(other); } + inline Tensor operator/(const T &scaler) const { return this->div(scaler); } inline bool operator==(const Tensor &other) const { return this->compare(other); } /* @@ -1038,6 +1120,18 @@ class Tensor return *this; } + const Tensor operator/=(const Tensor &other) + { + *this = *this / other; + return *this; + } + + const Tensor operator/=(const T &other) + { + *this = *this / other; + return *this; + } + // lvalue operator overloading template T &operator[](Indices... indices) diff --git a/tests/core/tensor_test.cpp b/tests/core/tensor_test.cpp index 955ecb0..9fb0c31 100644 --- a/tests/core/tensor_test.cpp +++ b/tests/core/tensor_test.cpp @@ -3,12 +3,14 @@ #include "tensor.hpp" #include "math.h" -TEST_CASE("TensorTest - Constructor and Destructor") { +TEST_CASE("TensorTest - Constructor and Destructor") +{ Tensor<> tensor; // No explicit assertions needed, just verify no crashes } -TEST_CASE("TensorTest - Scaler Constructor") { +TEST_CASE("TensorTest - Scaler Constructor") +{ Tensor<> tensor(10.0f); CHECK(tensor.ndim() == 1); CHECK(tensor.size() == 1); @@ -16,7 +18,8 @@ TEST_CASE("TensorTest - Scaler Constructor") { CHECK(tensor[0] == 10); } -TEST_CASE("TensorTest - 1D Tensor Constructor from initializer_list") { +TEST_CASE("TensorTest - 1D Tensor Constructor from initializer_list") +{ Tensor<> tensor_1d = {1.0f, 2.0f, 3.0f, 4.0f}; CHECK(tensor_1d.ndim() == 1); CHECK(tensor_1d.size() == 4); @@ -33,7 +36,8 @@ TEST_CASE("TensorTest - 1D Tensor Constructor from initializer_list") { CHECK(tensor_1d_1val[0] == 0.0f); } -TEST_CASE("TensorTest - 2D Tensor Constructor from initializer_list") { +TEST_CASE("TensorTest - 2D Tensor Constructor from initializer_list") +{ Tensor<> tensor_2d = {{1.0f, 2.0f}, {3.0f, 4.0f}}; CHECK(tensor_2d.ndim() == 2); CHECK(tensor_2d.size() == 4); @@ -43,7 +47,7 @@ TEST_CASE("TensorTest - 2D Tensor Constructor from initializer_list") { CHECK(tensor_2d[0, 1] == 2.0f); CHECK(tensor_2d[1, 0] == 3.0f); CHECK(tensor_2d[1, 1] == 4.0f); - + Tensor<> tensor_2d_1row = {{0.0f, 0.0f}}; CHECK(tensor_2d_1row.ndim() == 2); CHECK(tensor_2d_1row.size() == 2); @@ -61,7 +65,8 @@ TEST_CASE("TensorTest - 2D Tensor Constructor from initializer_list") { // CHECK(tensor_2d_1col[1, 0] == 0.0f); } -TEST_CASE("TensorTest - 3D Tensor Constructor from initializer_list") { +TEST_CASE("TensorTest - 3D Tensor Constructor from initializer_list") +{ Tensor<> tensor = {{{1.0f, 2.0f}, {3.0f, 4.0f}}, {{5.0f, 6.0f}, {7.0f, 8.0f}}}; CHECK(tensor.ndim() == 3); CHECK(tensor.size() == 8); @@ -87,7 +92,8 @@ TEST_CASE("TensorTest - 3D Tensor Constructor from initializer_list") { CHECK(tensor2[1, 1, 1] == 0.0f); } -TEST_CASE("TensorTest - 1D Tensor Constructor from vector") { +TEST_CASE("TensorTest - 1D Tensor Constructor from vector") +{ vector data = {1.0f, 2.0f, 3.0f, 4.0f}; Tensor<> tensor1 = data; CHECK(tensor1.ndim() == 1); @@ -106,7 +112,8 @@ TEST_CASE("TensorTest - 1D Tensor Constructor from vector") { CHECK(tensor2[0] == 0.0f); } -TEST_CASE("TensorTest - 2D Tensor Constructor from vector") { +TEST_CASE("TensorTest - 2D Tensor Constructor from vector") +{ vector> data = {{1.0f, 2.0f}, {3.0f, 4.0f}}; Tensor<> tensor = data; CHECK(tensor.ndim() == 2); @@ -117,7 +124,7 @@ TEST_CASE("TensorTest - 2D Tensor Constructor from vector") { CHECK(tensor[0, 1] == 2.0f); CHECK(tensor[1, 0] == 3.0f); CHECK(tensor[1, 1] == 4.0f); - + vector> data2 = {{0.0f, 0.0f}}; Tensor<> tensor2 = data2; CHECK(tensor2.ndim() == 2); @@ -128,7 +135,8 @@ TEST_CASE("TensorTest - 2D Tensor Constructor from vector") { CHECK(tensor2[0, 1] == 0.0f); } -TEST_CASE("TensorTest - 3D Tensor Constructor from vector") { +TEST_CASE("TensorTest - 3D Tensor Constructor from vector") +{ vector>> data = {{{1.0f, 2.0f}, {3.0f, 4.0f}}, {{5.0f, 6.0f}, {7.0f, 8.0f}}}; Tensor<> tensor = data; CHECK(tensor.ndim() == 3); @@ -156,7 +164,8 @@ TEST_CASE("TensorTest - 3D Tensor Constructor from vector") { CHECK(tensor2[1, 1, 1] == 0.0f); } -TEST_CASE("TensorTest - 4D Tensor Constructor from vector") { +TEST_CASE("TensorTest - 4D Tensor Constructor from vector") +{ vector>>> data = {{{{1.0f, 2.0f}, {3.0f, 4.0f}}, {{5.0f, 6.0f}, {7.0f, 8.0f}}}, {{{9.0f, 10.0f}, {11.0f, 12.0f}}, {{13.0f, 14.0f}, {15.0f, 16.0f}}}}; Tensor<> tensor = data; CHECK(tensor.ndim() == 4); @@ -172,7 +181,8 @@ TEST_CASE("TensorTest - 4D Tensor Constructor from vector") { CHECK(tensor[1, 1, 1, 1] == 16.0f); } -TEST_CASE("TensorTest - Copy Constructor") { +TEST_CASE("TensorTest - Copy Constructor") +{ // 1D tensor Tensor<> tensor1 = {1.0f, 2.0f, 3.0f, 4.0f}; Tensor<> test_tensor = tensor1; @@ -219,7 +229,8 @@ TEST_CASE("TensorTest - Copy Constructor") { CHECK(test_tensor[1, 1, 1] == 8.0f); } -TEST_CASE("TensorTest - Move Constructor") { +TEST_CASE("TensorTest - Move Constructor") +{ // 1D tensor Tensor<> tensor1 = {1.0f, 2.0f, 3.0f, 4.0f}; Tensor<> test_tensor = std::move(tensor1); @@ -266,7 +277,8 @@ TEST_CASE("TensorTest - Move Constructor") { CHECK(test_tensor[1, 1, 1] == 8.0f); } -TEST_CASE("TensorTest - Certain Value Constructor") { +TEST_CASE("TensorTest - Certain Value Constructor") +{ Tensor<> tensor_1d({1}, 0.0f); CHECK(tensor_1d.ndim() == 1); CHECK(tensor_1d.size() == 1); @@ -283,7 +295,6 @@ TEST_CASE("TensorTest - Certain Value Constructor") { CHECK(tensor_2d[1, 0] == 10.0f); CHECK(tensor_2d[1, 1] == 10.0f); - Tensor<> tensor_3d({2, 2, 2}, 5.0f); CHECK(tensor_3d.ndim() == 3); CHECK(tensor_3d.size() == 8); @@ -297,7 +308,8 @@ TEST_CASE("TensorTest - Certain Value Constructor") { CHECK(tensor_3d[1, 1, 1] == 5.0f); } -TEST_CASE("TensorTest - Indexing Operator") { +TEST_CASE("TensorTest - Indexing Operator") +{ Tensor<> tensor = {1.0f, 2.0f, 3.0f, 4.0f}; CHECK(tensor[0] == 1.0f); CHECK(tensor[1] == 2.0f); @@ -318,7 +330,8 @@ TEST_CASE("TensorTest - Indexing Operator") { CHECK(tensor[1, 1, 1] == 8.0f); } -TEST_CASE("TensorTest - Indexing Operator - Out of Bound") { +TEST_CASE("TensorTest - Indexing Operator - Out of Bound") +{ Tensor<> tensor = {1.0f, 2.0f, 3.0f, 4.0f}; CHECK_THROWS(tensor[4]); @@ -332,7 +345,8 @@ TEST_CASE("TensorTest - Indexing Operator - Out of Bound") { CHECK_THROWS(tensor[0, 0, 2]); } -TEST_CASE("TensorTest - Indexing Operator - Negative Indexing") { +TEST_CASE("TensorTest - Indexing Operator - Negative Indexing") +{ Tensor<> tensor = {1.0f, 2.0f, 3.0f, 4.0f}; CHECK(tensor[-1] == 4.0f); CHECK(tensor[-2] == 3.0f); @@ -350,7 +364,8 @@ TEST_CASE("TensorTest - Indexing Operator - Negative Indexing") { CHECK(tensor[0, -1, 0] == 3.0f); } -TEST_CASE("TensorTest - Indexing Operator - Normal Slicing") { +TEST_CASE("TensorTest - Indexing Operator - Normal Slicing") +{ Tensor<> tensor_1d = {1.0f, 2.0f, 3.0f, 4.0f}; Tensor<> sliced_tensor_1d_1 = tensor_1d.index({":2"}); CHECK(sliced_tensor_1d_1.ndim() == 1); @@ -391,7 +406,8 @@ TEST_CASE("TensorTest - Indexing Operator - Normal Slicing") { CHECK(sliced_tensor_2d_1[1, 1] == 4.0f); } -TEST_CASE("TensorTest - Transpose") { +TEST_CASE("TensorTest - Transpose") +{ Tensor<> tensor_2d = {{1.0f, 2.0f}, {3.0f, 4.0f}}; Tensor<> transposed_tensor_2d = tensor_2d.transpose(); CHECK(transposed_tensor_2d.ndim() == 2); @@ -415,50 +431,53 @@ TEST_CASE("TensorTest - Transpose") { CHECK(transposed_tensor_1d[-1, -1] == 4.0f); } -TEST_CASE("TensorTest - flatten") { +TEST_CASE("TensorTest - flatten") +{ Tensor<> tensor_2d = {{1.0f, 2.0f}, {3.0f, 4.0f}}; - tensor_2d.flatten(); - CHECK(tensor_2d.ndim() == 1); - CHECK(tensor_2d.size() == 4); - CHECK(tensor_2d.shapes()[0] == 4); - CHECK(tensor_2d[0] == 1.0f); - CHECK(tensor_2d[1] == 2.0f); - CHECK(tensor_2d[2] == 3.0f); - CHECK(tensor_2d[3] == 4.0f); + Tensor<> flattened_tensor_2d = tensor_2d.flatten(); + CHECK(flattened_tensor_2d.ndim() == 1); + CHECK(flattened_tensor_2d.size() == 4); + CHECK(flattened_tensor_2d.shapes()[0] == 4); + CHECK(flattened_tensor_2d[0] == 1.0f); + CHECK(flattened_tensor_2d[1] == 2.0f); + CHECK(flattened_tensor_2d[2] == 3.0f); + CHECK(flattened_tensor_2d[3] == 4.0f); } -TEST_CASE("TensorTest - reshape") { +TEST_CASE("TensorTest - reshape") +{ Tensor<> tensor_2d = {{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}, {7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}}; - tensor_2d.reshape({2, 3, 2}); - CHECK(tensor_2d.ndim() == 3); - CHECK(tensor_2d.size() == 12); - CHECK(tensor_2d.shapes()[0] == 2); - CHECK(tensor_2d.shapes()[1] == 3); - CHECK(tensor_2d.shapes()[2] == 2); - CHECK(tensor_2d[0, 0, 0] == 1.0f); - CHECK(tensor_2d[0, 0, 1] == 2.0f); - CHECK(tensor_2d[0, 1, 0] == 3.0f); - CHECK(tensor_2d[0, 1, 1] == 4.0f); - CHECK(tensor_2d[0, 2, 0] == 5.0f); - CHECK(tensor_2d[0, 2, 1] == 6.0f); - CHECK(tensor_2d[1, 0, 0] == 7.0f); - CHECK(tensor_2d[-1, -1, -1] == 12.0f); + Tensor<> reshaped_tensor_2d = tensor_2d.reshape({2, 3, 2}); + CHECK(reshaped_tensor_2d.ndim() == 3); + CHECK(reshaped_tensor_2d.size() == 12); + CHECK(reshaped_tensor_2d.shapes()[0] == 2); + CHECK(reshaped_tensor_2d.shapes()[1] == 3); + CHECK(reshaped_tensor_2d.shapes()[2] == 2); + CHECK(reshaped_tensor_2d[0, 0, 0] == 1.0f); + CHECK(reshaped_tensor_2d[0, 0, 1] == 2.0f); + CHECK(reshaped_tensor_2d[0, 1, 0] == 3.0f); + CHECK(reshaped_tensor_2d[0, 1, 1] == 4.0f); + CHECK(reshaped_tensor_2d[0, 2, 0] == 5.0f); + CHECK(reshaped_tensor_2d[0, 2, 1] == 6.0f); + CHECK(reshaped_tensor_2d[1, 0, 0] == 7.0f); + CHECK(reshaped_tensor_2d[-1, -1, -1] == 12.0f); Tensor<> tensor_1d = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; - tensor_1d.reshape({2, 3}); - CHECK(tensor_1d.ndim() == 2); - CHECK(tensor_1d.size() == 6); - CHECK(tensor_1d.shapes()[0] == 2); - CHECK(tensor_1d.shapes()[1] == 3); - CHECK(tensor_1d[0, 0] == 1.0f); - CHECK(tensor_1d[0, 1] == 2.0f); - CHECK(tensor_1d[0, 2] == 3.0f); - CHECK(tensor_1d[1, 0] == 4.0f); - CHECK(tensor_1d[1, 1] == 5.0f); - CHECK(tensor_1d[1, 2] == 6.0f); + Tensor<> reshaped_tensor_1d = tensor_1d.reshape({2, 3}); + CHECK(reshaped_tensor_1d.ndim() == 2); + CHECK(reshaped_tensor_1d.size() == 6); + CHECK(reshaped_tensor_1d.shapes()[0] == 2); + CHECK(reshaped_tensor_1d.shapes()[1] == 3); + CHECK(reshaped_tensor_1d[0, 0] == 1.0f); + CHECK(reshaped_tensor_1d[0, 1] == 2.0f); + CHECK(reshaped_tensor_1d[0, 2] == 3.0f); + CHECK(reshaped_tensor_1d[1, 0] == 4.0f); + CHECK(reshaped_tensor_1d[1, 1] == 5.0f); + CHECK(reshaped_tensor_1d[1, 2] == 6.0f); } -TEST_CASE("TensorTest - abs") { +TEST_CASE("TensorTest - abs") +{ Tensor<> tensor_2d = {{-1.0f, -2.0f}, {3.0f, 4.0f}}; Tensor<> abs_tensor_2d = tensor_2d.abs(); CHECK(abs_tensor_2d.ndim() == 2); @@ -471,7 +490,8 @@ TEST_CASE("TensorTest - abs") { CHECK(abs_tensor_2d[1, 1] == 4.0f); } -TEST_CASE("TensorTest - sum") { +TEST_CASE("TensorTest - sum") +{ Tensor<> tensor_1d = {1.0f, 2.0f, 3.0f, 4.0f}; double sum_1d = tensor_1d.sum(); CHECK(sum_1d == 10.0f); @@ -485,9 +505,11 @@ TEST_CASE("TensorTest - sum") { CHECK(sum_3d == 36.0f); } -TEST_CASE("TensorTest - filter") { +TEST_CASE("TensorTest - filter") +{ Tensor<> tensor_1d = {1.0f, 2.0f, 3.0f, 4.0f}; - Tensor<> filtered_tensor_1d = tensor_1d.filter([](double x) { return x < 3.0f; }); + Tensor<> filtered_tensor_1d = tensor_1d.filter([](double x) + { return x < 3.0f; }); CHECK(filtered_tensor_1d.ndim() == 1); CHECK(filtered_tensor_1d.size() == 4); CHECK(filtered_tensor_1d.shapes()[0] == 4); @@ -497,7 +519,8 @@ TEST_CASE("TensorTest - filter") { CHECK(filtered_tensor_1d[3] == 0.0f); Tensor<> tensor_2d = {{1.0f, 2.0f}, {3.0f, 4.0f}}; - Tensor<> filtered_tensor_2d = tensor_2d.filter([](double x) { return x < 3.0f; }); + Tensor<> filtered_tensor_2d = tensor_2d.filter([](double x) + { return x < 3.0f; }); CHECK(filtered_tensor_2d.ndim() == 2); CHECK(filtered_tensor_2d.size() == 4); CHECK(filtered_tensor_2d.shapes()[0] == 2); @@ -508,7 +531,8 @@ TEST_CASE("TensorTest - filter") { CHECK(filtered_tensor_2d[1, 1] == 0.0f); Tensor<> tensor_3d = {{{1.0f, 2.0f}, {3.0f, 4.0f}}, {{5.0f, 6.0f}, {7.0f, 8.0f}}}; - Tensor<> filtered_tensor_3d = tensor_3d.filter([](double x) { return x < 3.0f; }); + Tensor<> filtered_tensor_3d = tensor_3d.filter([](double x) + { return x < 3.0f; }); CHECK(filtered_tensor_3d.ndim() == 3); CHECK(filtered_tensor_3d.size() == 8); CHECK(filtered_tensor_3d.shapes()[0] == 2); @@ -524,11 +548,13 @@ TEST_CASE("TensorTest - filter") { CHECK(filtered_tensor_3d[1, 1, 1] == 0.0f); } -TEST_CASE("TensorTest - map") { +TEST_CASE("TensorTest - map") +{ double eps = 1e-5f; Tensor<> tensor_1d = {1.0f, 2.0f, 3.0f, 4.0f}; - Tensor<> tensor_1d_exp = tensor_1d.map([](double x) { return exp(x); }); + Tensor<> tensor_1d_exp = tensor_1d.map([](double x) + { return exp(x); }); CHECK(tensor_1d_exp.ndim() == 1); CHECK(tensor_1d_exp.size() == 4); CHECK(tensor_1d_exp.shapes()[0] == 4); @@ -538,7 +564,8 @@ TEST_CASE("TensorTest - map") { CHECK(tensor_1d_exp[3] - exp(4.0f) < eps); Tensor<> tensor_2d = {{1.0f, 2.0f}, {3.0f, 4.0f}}; - Tensor<> tensor_2d_times_10 = tensor_2d.map([](double x) { return x * 10.0f; }); + Tensor<> tensor_2d_times_10 = tensor_2d.map([](double x) + { return x * 10.0f; }); CHECK(tensor_2d_times_10.ndim() == 2); CHECK(tensor_2d_times_10.size() == 4); CHECK(tensor_2d_times_10.shapes()[0] == 2); @@ -549,7 +576,8 @@ TEST_CASE("TensorTest - map") { CHECK(tensor_2d_times_10[1, 1] == 40.0f); Tensor<> tensor_3d = {{{1.0f, 2.0f}, {3.0f, 4.0f}}, {{5.0f, 6.0f}, {7.0f, 8.0f}}}; - Tensor<> tensor_3d_log = tensor_3d.map([](double x) { return log(x); }); + Tensor<> tensor_3d_log = tensor_3d.map([](double x) + { return log(x); }); CHECK(tensor_3d_log.ndim() == 3); CHECK(tensor_3d_log.size() == 8); CHECK(tensor_3d_log.shapes()[0] == 2); @@ -565,7 +593,8 @@ TEST_CASE("TensorTest - map") { CHECK(tensor_3d_log[1, 1, 1] - log(8.0f) < eps); } -TEST_CASE("TensorTest - equal") { +TEST_CASE("TensorTest - equal") +{ Tensor<> tensor_1d = {1.0f, 2.0f, 3.0f, 4.0f}; Tensor<> another_tensor_1d = {1.0f, 2.0f, 5.0f, 4.0f}; Tensor equal_tensor_1d = tensor_1d.equal(another_tensor_1d); @@ -607,7 +636,8 @@ TEST_CASE("TensorTest - equal") { CHECK(equal_tensor_3d[1, 1, 1] == 1); } -TEST_CASE("TensorTest - Matrix Multiplication") { +TEST_CASE("TensorTest - Matrix Multiplication") +{ Tensor<> tensor_2d_1 = {{1.0f, 2.0f}, {3.0f, 4.0f}}; Tensor<> transposed_tensor_2d_1 = tensor_2d_1.transpose(); Tensor<> matrix_multiplication_2d_1 = tensor_2d_1.matmul(transposed_tensor_2d_1); @@ -620,5 +650,4 @@ TEST_CASE("TensorTest - Matrix Multiplication") { CHECK(matrix_multiplication_2d_1[0, 1] == 11.0f); CHECK(matrix_multiplication_2d_1[1, 0] == 11.0f); CHECK(matrix_multiplication_2d_1[1, 1] == 25.0f); - } \ No newline at end of file