diff --git a/CMakeLists.txt b/CMakeLists.txt index 4c09c2f..cc64f52 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,16 +31,19 @@ include_directories( # Add source files set(SOURCE_FILES - src/core/module.cpp src/core/tensor.cpp + src/utils/tensor_utils.cpp + src/core/module.cpp src/modules/layers/linear.cpp + src/modules/layers/conv2d.cpp + src/modules/layers/flatten.cpp + src/utils/conv2d_utils.cpp src/modules/layers/dropout.cpp src/modules/layers/conv2d.cpp src/modules/losses/mse.cpp src/modules/activations/relu.cpp src/modules/activations/softmax.cpp src/modules/losses/cross_entropy.cpp - src/utils/tensor_utils.cpp src/datasets/mnist.cpp src/models/mlp.cpp src/metrics/accuracy.cpp @@ -51,7 +54,7 @@ set(SOURCE_FILES add_library(neuralnet ${SOURCE_FILES}) # Add the executable for the main example -add_executable(main examples/test_tensor.cpp) +add_executable(main examples/test_conv2d.cpp) target_link_libraries(main neuralnet) # Only build tests if BUILD_TESTS is ON diff --git a/debug.sh b/debug.sh new file mode 100755 index 0000000..2cf2f4d --- /dev/null +++ b/debug.sh @@ -0,0 +1,4 @@ +cd build/ +cmake -DCMAKE_BUILD_TYPE=Debug .. +make +lldb main \ No newline at end of file diff --git a/examples/main.cpp b/examples/main.cpp index 1c62a61..57a301f 100644 --- a/examples/main.cpp +++ b/examples/main.cpp @@ -6,7 +6,8 @@ #include "utils.hpp" using namespace nn; -int main() { +int main() +{ // Define the hyperparameters @@ -21,7 +22,8 @@ int main() { const string mnist_label_file = "../data/mnist/train-labels.idx1-ubyte"; // load MNIST data - if (!dataset.load_data(mnist_image_file, mnist_label_file)) { + if (!dataset.load_data(mnist_image_file, mnist_label_file)) + { cerr << "Failed to load dataset" << endl; return 1; } @@ -29,23 +31,31 @@ int main() { // Initialize the model MLP model = MLP({784, 128, 64, 10}, DROPOUT_P); + cout << "Finished model initialization" << endl; + // Define the loss function CrossEntropyLoss criterion = CrossEntropyLoss(); + cout << "Finished loss initialization" << endl; + double loss = 0.0; double acc = 0.0; vector loss_list; vector accuracy_list; + cout << "Training started..." << endl; + // // Train the model // Example of iterating through all batches - for (size_t e = 0; e < EPOCH; e++) { + for (size_t e = 0; e < EPOCH; e++) + { cout << "\nEpoch " << e + 1 << ":\n"; - dataset.reset(); // Reset batch counter at the start of each epoch + dataset.reset(); // Reset batch counter at the start of each epoch loss_list.clear(); accuracy_list.clear(); - - for (size_t i = 0; i < dataset.get_num_batches(); i++) { + + for (size_t i = 0; i < dataset.get_num_batches(); i++) + { auto batch = dataset.get_next_batch(); auto [data, labels] = batch.to_tensor(); @@ -53,9 +63,7 @@ int main() { Tensor<> output = model(data); loss = criterion(output, labels); - // cout << "After loss" << endl; acc = metrics::accuracy(output, labels); - // cout << "After acc" << endl; accuracy_list.push_back(acc); loss_list.push_back(loss); @@ -66,7 +74,7 @@ int main() { model.update_params(LR); // print the training stats - print_training_stats_line(i, loss, acc); + print_stats_line(i, loss, acc); } double total_loss = accumulate(loss_list.begin(), loss_list.end(), 0.0) / loss_list.size(); @@ -78,5 +86,51 @@ int main() { cout << "------------------------------------" << endl; } + // Inference + + model.eval(); + + const string mnist_image_file_test = "../data/mnist/t10k-images.idx3-ubyte"; + const string mnist_label_file_test = "../data/mnist/t10k-labels.idx1-ubyte"; + + MNIST test_dataset(BATCH_SIZE); + + if (!test_dataset.load_data(mnist_image_file_test, mnist_label_file_test)) + { + cerr << "Failed to load test dataset" << endl; + return 1; + } + + cout << "\n------------------------------------" << endl; + cout << "Testing started..." << endl; + + loss = 0.0; + acc = 0.0; + loss_list.clear(); + accuracy_list.clear(); + + for (size_t i = 0; i < test_dataset.get_num_batches(); i++) + { + auto batch = test_dataset.get_next_batch(); + auto [data, labels] = batch.to_tensor(); + + // forward propagation + Tensor<> output = model(data); + + loss = criterion(output, labels); + acc = metrics::accuracy(output, labels); + + accuracy_list.push_back(acc); + loss_list.push_back(loss); + + // print the testing stats + print_stats_line(i, loss, acc); + } + + double total_loss = accumulate(loss_list.begin(), loss_list.end(), 0.0) / loss_list.size(); + double total_acc = accumulate(accuracy_list.begin(), accuracy_list.end(), 0.0) / accuracy_list.size() * 100; + + cout << "------------------------------------" << endl; + return 0; } diff --git a/examples/test_conv2d.cpp b/examples/test_conv2d.cpp new file mode 100644 index 0000000..f9bbd43 --- /dev/null +++ b/examples/test_conv2d.cpp @@ -0,0 +1,236 @@ +#include "conv2d.hpp" +#include "flatten.hpp" +#include "linear.hpp" +#include "cross_entropy.hpp" +using namespace nn; + +int main() +{ + size_t batch_size = 2; + size_t input_data_size = 15; + + size_t in_channels = 4; + size_t out_channels = 8; + size_t weight_size = 3; + size_t padding = 3; + size_t stride = 2; + size_t dilation = 2; + string padding_mode = "zeros"; + bool use_bias = true; + + size_t out_channels_2 = 7; + size_t weight_size_2 = 3; + size_t padding_2 = 3; + size_t stride_2 = 2; + size_t dilation_2 = 4; + string padding_mode_2 = "zeros"; + bool use_bias_2 = true; + + size_t in_features = out_channels_2 * input_data_size * input_data_size; + size_t out_features = 10; + + Tensor<> test_weight = Tensor<>({out_channels, in_channels, weight_size, weight_size}, 0.0f); + Tensor<> test_bias = Tensor<>({out_channels}, 0.0f); + + double val = 0.01; + for (size_t i = 0; i < out_channels; i++) + { + for (size_t j = 0; j < in_channels; j++) + { + for (size_t k = 0; k < weight_size; k++) + { + for (size_t l = 0; l < weight_size; l++) + { + test_weight[i, j, k, l] = val; + val += 0.01; + } + } + } + } + + val = 0.01; + for (size_t i = 0; i < out_channels; i++) + { + test_bias[i] = val; + val += 0.01; + } + + Tensor<> test_weight_2 = Tensor<>({out_channels_2, out_channels, weight_size_2, weight_size_2}, 0.0f); + Tensor<> test_bias_2 = Tensor<>({out_channels_2}, 0.0f); + + val = 0.01; + for (size_t i = 0; i < out_channels_2; i++) + { + for (size_t j = 0; j < out_channels; j++) + { + for (size_t k = 0; k < weight_size_2; k++) + { + for (size_t l = 0; l < weight_size_2; l++) + { + test_weight_2[i, j, k, l] = val; + val += 0.01; + } + } + } + } + + val = 0.01; + for (size_t i = 0; i < out_channels_2; i++) + { + test_bias_2[i] = val; + val += 0.01; + } + + Tensor<> test_input = Tensor<>({batch_size, in_channels, input_data_size, input_data_size}, 0.0f); + + val = 0.01; + for (size_t i = 0; i < batch_size; i++) + { + for (size_t j = 0; j < in_channels; j++) + { + for (size_t k = 0; k < input_data_size; k++) + { + for (size_t l = 0; l < input_data_size; l++) + { + test_input[i, j, k, l] = val; + val += 0.01; + } + } + } + } + + // cout << "Test input: " << endl; + // test_input.print(); + // cout << endl; + + // cout << "Test weight: " << endl; + // test_weight.print(); + // cout << endl; + + // cout << "Test bias: " << endl; + // test_bias.print(); + // cout << endl; + + // cout << "Test weight 2: " << endl; + // test_weight_2.print(); + // cout << endl; + + // cout << "Test bias 2: " << endl; + // test_bias_2.print(); + // cout << endl; + + Conv2d conv2d_1(in_channels, out_channels, weight_size, padding, stride, dilation, padding_mode, use_bias); + Conv2d conv2d_2(out_channels, out_channels_2, weight_size_2, padding_2, stride_2, dilation_2, padding_mode_2, use_bias_2); + Flatten flatten; + CrossEntropyLoss cross_entropy; + + conv2d_1.set_weight(test_weight); + conv2d_1.set_bias(test_bias); + + conv2d_2.set_weight(test_weight_2); + conv2d_2.set_bias(test_bias_2); + + // cout << "Test input: " << endl; + // test_input.print(); + // cout << endl; + + Tensor<> output = conv2d_1(test_input); + Tensor<> output_2 = conv2d_2(output); + + cout << "Output: " << endl; + output.print(); + cout << endl; + + cout << "Output 2: " << endl; + output_2.print(); + cout << endl; + + cout << "output shape : "; + for (int i = 0; i < output.ndim(); i++) + { + cout << output.shapes()[i] << " "; + } + cout << endl; + + cout << "output_2 shape : "; + for (int i = 0; i < output_2.ndim(); i++) + { + cout << output_2.shapes()[i] << " "; + } + cout << endl; + + Tensor<> flattened_output_2 = flatten(output_2); + + cout << "Flattened output 2: " << endl; + flattened_output_2.print(); + cout << endl; + + Linear linear(flattened_output_2.shapes()[1], out_features, false); + + Tensor<> test_linear_weight({flattened_output_2.shapes()[1], out_features}, 0.0); + + val = 0.01; + for (size_t i = 0; i < flattened_output_2.shapes()[1]; ++i) + { + for (size_t j = 0; j < out_features; ++j) + { + test_linear_weight[i, j] = val; + val += 0.01; + } + } + + cout << "linear in features: " << flattened_output_2.shapes()[1] << endl; + cout << "linear out features: " << out_features << endl; + + linear.set_weight(test_linear_weight); + + cout << "Test linear weight: " << endl; + test_linear_weight.print(); + cout << endl; + + Tensor<> output_3 = linear(flattened_output_2); + + cout << "Output 3: " << endl; + output_3.print(); + cout << endl; + + Tensor<> labels({batch_size}, 0); + + val = 1; + for (size_t i = 0; i < batch_size; i++) + { + labels[i] = val; + val++; + } + + // output.print(); + // output_2.print(); + // output_3.print(); + + output_3 /= 1e6; + + double loss = cross_entropy(output_3, labels); + + cout << "Loss: " << loss << endl; + + Tensor<> dL_dV_2 = cross_entropy.backward(); + Tensor<> dL_dV_1 = linear.backward(dL_dV_2); + Tensor<> dL_dZ = flatten.backward(dL_dV_1); + + cout << "dL/dZ: " << endl; + dL_dZ.print(); + cout << endl; + + Tensor<> dL_dY = conv2d_2.backward(dL_dZ); + Tensor<> dL_dX = conv2d_1.backward(dL_dY); + + cout << "dL_dY: " << endl; + dL_dY.print(); + cout << endl; + + cout << "dL_dX: " << endl; + dL_dX.print(); + cout << endl; + + return 0; +} \ No newline at end of file diff --git a/include/core/loss.hpp b/include/core/loss.hpp index d35d8de..4f3eb14 100644 --- a/include/core/loss.hpp +++ b/include/core/loss.hpp @@ -4,6 +4,7 @@ namespace nn { class Loss { public: + Loss() = default; virtual ~Loss() = default; virtual double forward(const Tensor<>& Y_hat, const Tensor<>& Y) = 0; virtual Tensor<> backward() = 0; @@ -11,7 +12,6 @@ class Loss { protected: - Tensor<> grad_output_; Tensor<> Y_cache_; Tensor<> Y_hat_cache_; }; diff --git a/include/core/module.hpp b/include/core/module.hpp index 986cf16..ea7b266 100644 --- a/include/core/module.hpp +++ b/include/core/module.hpp @@ -10,6 +10,7 @@ namespace nn { class Module { public: + Module() = default; /** * Virtual destructor for the Module class. */ diff --git a/include/core/tensor.hpp b/include/core/tensor.hpp index 8310ced..00f201f 100644 --- a/include/core/tensor.hpp +++ b/include/core/tensor.hpp @@ -1,906 +1,1274 @@ #pragma once -#include -#include #include "tensor_utils.hpp" using namespace std; - template -class Tensor { - private: - shared_ptr> data_ = make_shared>(); // data is stored as a 1D vector // shared_ptr is used to avoid copying data - vector shapes_; // store the dimensions of the tensor - vector strides_; // store the strides of the tensor - size_t offset_ = 0; // offset for slicing - - // Helper function to calculate the index in the 1D vector for a given set of indices expressed in the form of N-D vector - size_t calculate_idx(const vector& idxs) const { - size_t idx = this->offset_; - for (size_t i = 0; i < idxs.size(); ++i) { - idx += idxs[i] * this->strides_[i]; - } - return idx; +class Tensor +{ +private: + shared_ptr> data_ = nullptr; // data is stored as a 1D vector // shared_ptr is used to avoid copying data + vector shape_; // store the dimensions of the tensor + vector strides_; // store the strides of the tensor + size_t offset_ = 0; // offset for slicing + mutable int64_t size_ = -1; // it can be changed by const member functions (in size() function) + + // Helper function to calculate the index in the 1D vector for a given set of indices expressed in the form of N-D vector + size_t calculate_idx(const vector &idxs) const + { + size_t idx = this->offset_; + for (size_t i = 0; i < idxs.size(); ++i) + { + idx += idxs[i] * this->strides_[i]; + } + return idx; + } + + // Helper function for printing since we don't know the number of dimensions + void print_recursive_impl(size_t dim, size_t offset, int indent = 0) const + { + const string indent_str(indent, ' '); + + // Handle empty dimensions + if (this->shape_[dim] == 0) + { + cout << indent_str << "[]"; + return; } - // Helper function for printing since we don't know the number of dimensions - void print_recursive_impl(size_t dim, size_t offset, int indent = 0) const { - const string indent_str(indent, ' '); + cout << indent_str << "["; - // Handle empty dimensions - if (this->shapes_[dim] == 0) { - cout << indent_str << "[]"; - return; + if (dim == this->ndim() - 1) + { // Last dimension + for (size_t i = 0; i < this->shape_[dim]; ++i) + { + cout << (*this->data_)[offset + i * this->strides_[dim]]; + if (i < this->shape_[dim] - 1) + cout << ", "; } - - cout << indent_str << "["; - - if (dim == this->ndim() - 1) { // Last dimension - for (size_t i = 0; i < this->shapes_[dim]; ++i) { - cout << (*this->data_)[offset + i * this->strides_[dim]]; - if (i < this->shapes_[dim] - 1) cout << ", "; - } - } else { - cout << "\n"; - for (size_t i = 0; i < this->shapes_[dim]; ++i) { - print_recursive_impl(dim + 1, offset + i * this->strides_[dim], indent + 2); - if (i < this->shapes_[dim] - 1) cout << ",\n"; - } - cout << "\n" << indent_str; + } + else + { + cout << "\n"; + for (size_t i = 0; i < this->shape_[dim]; ++i) + { + print_recursive_impl(dim + 1, offset + i * this->strides_[dim], indent + 2); + if (i < this->shape_[dim] - 1) + cout << ",\n"; } - cout << "]"; + cout << "\n" + << indent_str; + } + cout << "]"; + } + + // Helper function for operator[] overloading + template + const vector get_idxs(Indices... indices) const + { + // Convert variadic arguments to vector + vector idxs({static_cast(indices)...}); + vector normalized_idxs; + + // for better performance, reserve the size of the vector + normalized_idxs.reserve(idxs.size()); + + // Check bounds + for (size_t i = 0; i < idxs.size(); ++i) + { + size_t normalized_idx = normalize_index(idxs[i], this->shape_[i]); + normalized_idxs.push_back(normalized_idx); } - // Helper function for operator[] overloading - template - const vector get_idxs(Indices... indices) const { - // Convert variadic arguments to vector - vector idxs({static_cast(indices)...}); - vector normalized_idxs; + return normalized_idxs; + } + + /** + * Reduces a 1D or 2D tensor along its rows using the specified reduction operation. + * + * @tparam U The data type of the resulting tensor. Defaults to the type of the current tensor. + * @param op The reduction operation to perform. Supported operations are MAX, MIN, ARGMAX, and ARGMIN. + * @return A Tensor of shape (num_rows, 1) containing the reduced values or indices. + * @throws runtime_error if the tensor's number of dimensions is greater than 2. + */ + + template + Tensor reduce_impl(ReduceOp op) const + { + const size_t ndim = this->ndim(); + + if (ndim > 2) + { + throw std::runtime_error("Only 1D and 2D tensors are supported for reduce"); + } - // for better performance, reserve the size of the vector - normalized_idxs.reserve(idxs.size()); + // Determine tensor dimensions + const size_t num_rows = (ndim == 2) ? this->shape_[0] : 1; + const size_t num_cols = (ndim == 2) ? this->shape_[1] : this->shape_[0]; - // Check bounds - for (size_t i = 0; i < idxs.size(); ++i) { - size_t normalized_idx = normalize_index(idxs[i], this->shapes_[i]); - normalized_idxs.push_back(normalized_idx); - } + vector result(num_rows); - return normalized_idxs; - } + for (size_t i = 0; i < num_rows; ++i) + { + // Calculate base offset for current row + size_t row_offset = this->offset_; + if (ndim == 2) + { + row_offset += i * this->strides_[0]; + } - /** - * Reduces a 1D or 2D tensor along its rows using the specified reduction operation. - * - * @tparam U The data type of the resulting tensor. Defaults to the type of the current tensor. - * @param op The reduction operation to perform. Supported operations are MAX, MIN, ARGMAX, and ARGMIN. - * @return A Tensor of shape (num_rows, 1) containing the reduced values or indices. - * @throws runtime_error if the tensor's number of dimensions is greater than 2. - */ + T extreme_val = (*this->data_)[row_offset]; + size_t extreme_idx = 0; - template - Tensor reduce_impl(ReduceOp op) const { - if (this->ndim() > 2) { - throw std::runtime_error("Only 1D and 2D tensors are supported for reduce"); - } + // Process elements using stride-aware indexing + for (size_t j = 1; j < num_cols; ++j) + { + size_t elem_offset = row_offset; + if (ndim == 2) + { + elem_offset += j * this->strides_[1]; + } + else + { + elem_offset += j * this->strides_[0]; + } - const size_t num_rows = (this->ndim() == 2)? this->shapes_[0] : 1; - const size_t num_cols = (this->ndim() == 2)? this->shapes_[1] : this->shapes_[0]; - - - // Result will be a tensor of shape (num_rows, 1) - vector result(num_rows); - - for (size_t i = 0; i < num_rows; i++) { - size_t start_idx = i * num_cols; - - // Initialize with first element in row - T extreme_val = (*this->data_)[start_idx]; - size_t extreme_idx = 0; - - // Process remaining elements in the row - for (size_t j = 1; j < num_cols; j++) { - size_t curr_idx = start_idx + j; - bool update = false; - - switch (op) { - case ReduceOp::MAX: - case ReduceOp::ARGMAX: - update = (*this->data_)[curr_idx] > extreme_val; - break; - case ReduceOp::MIN: - case ReduceOp::ARGMIN: - update = (*this->data_)[curr_idx] < extreme_val; - break; - } - - if (update) { - extreme_val = (*this->data_)[curr_idx]; - extreme_idx = j; - } + bool update = false; + switch (op) + { + case ReduceOp::MAX: + case ReduceOp::ARGMAX: + update = (*this->data_)[elem_offset] > extreme_val; + break; + case ReduceOp::MIN: + case ReduceOp::ARGMIN: + update = (*this->data_)[elem_offset] < extreme_val; + break; } - - // Store the result - switch (op) { - case ReduceOp::MAX: - case ReduceOp::MIN: - result[i] = extreme_val; - break; - case ReduceOp::ARGMAX: - case ReduceOp::ARGMIN: - result[i] = extreme_idx; - break; + + if (update) + { + extreme_val = (*this->data_)[elem_offset]; + extreme_idx = j; } } - - return Tensor(result); - } - Tensor arithmetic_operation_impl(ArithmeticOp op, const Tensor& other) const { - if (other.shapes_ != this->shapes_) { - throw runtime_error("Shape mismatch in arithmetic operation"); + switch (op) + { + case ReduceOp::MAX: + case ReduceOp::MIN: + result[i] = static_cast(extreme_val); + break; + case ReduceOp::ARGMAX: + case ReduceOp::ARGMIN: + result[i] = static_cast(extreme_idx); + break; } + } - size_t ndim = this->ndim(); - - Tensor result(this->shapes_, static_cast(0)); - - // Precompute result's contiguous strides for index calculation - const vector& result_strides = result.strides_; - - for (size_t i = 0; i < this->size(); i++) { - auto [a_offset, b_offset] = calculate_tensors_offsets(i, ndim, result_strides, other); - - switch (op) { - case ArithmeticOp::ADD: - (*result.data_)[i] = (*this->data_)[a_offset] + (*other.data_)[b_offset]; - break; - case ArithmeticOp::SUB: - (*result.data_)[i] = (*this->data_)[a_offset] - (*other.data_)[b_offset]; - break; - case ArithmeticOp::MUL: - (*result.data_)[i] = (*this->data_)[a_offset] * (*other.data_)[b_offset]; - break; - case ArithmeticOp::DIV: - (*result.data_)[i] = (*this->data_)[a_offset] / (*other.data_)[b_offset]; - break; - } + return Tensor(result); + } + + Tensor arithmetic_operation_impl(ArithmeticOp op, const Tensor &other) const + { + if (other.shape_ != this->shape_) + { + throw runtime_error("Shape mismatch in arithmetic operation"); + } + + size_t ndim = this->ndim(); + + Tensor result(this->shape_, static_cast(0)); + + // Precompute result's contiguous strides for index calculation + const vector &result_strides = result.strides_; + + for (size_t i = 0; i < this->size(); i++) + { + auto [a_offset, b_offset] = calculate_tensors_offsets(i, ndim, result_strides, other); + + switch (op) + { + case ArithmeticOp::ADD: + (*result.data_)[i] = (*this->data_)[a_offset] + (*other.data_)[b_offset]; + break; + case ArithmeticOp::SUB: + (*result.data_)[i] = (*this->data_)[a_offset] - (*other.data_)[b_offset]; + break; + case ArithmeticOp::MUL: + (*result.data_)[i] = (*this->data_)[a_offset] * (*other.data_)[b_offset]; + break; + case ArithmeticOp::DIV: + (*result.data_)[i] = (*this->data_)[a_offset] / (*other.data_)[b_offset]; + break; } - return result; } + return result; + } - // Helper function to cacluate the stride of the tensor - void calculate_strides() { - this->strides_.resize(this->ndim(), 0); - vector strides(this->ndim()); + // Helper function to cacluate the stride of the tensor + void compute_contiguous_strides() + { + this->strides_.resize(this->ndim(), 0); - int64_t stride = 1; + int64_t stride = 1; - for (int64_t i = this->ndim() - 1; i >= 0; --i) { - this->strides_[i] = stride; - stride *= this->shapes_[i]; - } + for (int64_t i = this->ndim() - 1; i >= 0; --i) + { + this->strides_[i] = stride; + stride *= this->shape_[i]; } + } - std::tuple calculate_tensors_offsets(const size_t idx, const size_t ndim, const vector& result_strides, const Tensor& other) const { - vector indices(ndim); + std::tuple calculate_tensors_offsets(const size_t idx, const size_t ndim, const vector &result_strides, const Tensor &other) const + { + vector indices(ndim); - size_t remaining = idx; + size_t remaining = idx; - for (int dim = 0; dim < ndim; ++dim) { - indices[dim] = remaining / result_strides[dim]; - remaining %= result_strides[dim]; - } + for (int64_t dim = 0; dim < ndim; ++dim) + { + indices[dim] = remaining / result_strides[dim]; + remaining %= result_strides[dim]; + } - // Calculate offsets using original tensors' strides - size_t a_offset = this->offset_; - size_t b_offset = other.offset_; + // Calculate offsets using original tensors' strides + size_t a_offset = this->offset_; + size_t b_offset = other.offset_; - for (int dim = 0; dim < ndim; ++dim) { - a_offset += indices[dim] * this->strides_[dim]; - b_offset += indices[dim] * other.strides_[dim]; - } + for (int64_t dim = 0; dim < ndim; ++dim) + { + a_offset += indices[dim] * this->strides_[dim]; + b_offset += indices[dim] * other.strides_[dim]; + } - return {a_offset, b_offset}; - } - - // Declare friendship so that TensorView can access private members of Tensor - template - friend Tensor dtype_impl(const Tensor& tensor); - - public: - Tensor() = default; - - // Helper to recursively flatten nested vectors and compute shapes - template - void flatten_vector(const std::vector& vec, size_t depth = 0) { - // Add current level's size to shapes - if (depth == this->shapes_.size()) { - // First encounter with this depth: record size - this->shapes_.push_back(vec.size()); - - } - else { - // Verify size matches the existing dimension - if (vec.size() != this->shapes_[depth]) { - throw std::invalid_argument("Inconsistent shape at depth " + std::to_string(depth)); - } + return {a_offset, b_offset}; + } + + // Helper to recursively flatten nested vectors and compute shapes + template + void flatten_vector(const std::vector &vec, size_t depth = 0) + { + // Add current level's size to shapes + if (depth == this->shape_.size()) + { + // First encounter with this depth: record size + this->shape_.push_back(vec.size()); + } + else + { + // Verify size matches the existing dimension + if (vec.size() != this->shape_[depth]) + { + throw std::invalid_argument("Inconsistent shape at depth " + std::to_string(depth)); } + } - if constexpr (is_vector::value) { - // Ensure nested vectors have consistent sizes at this level - if (!vec.empty()) { - size_t expected_size = vec[0].size(); - for (const auto& elem : vec) { - if (elem.size() != expected_size) { - throw std::invalid_argument("Inconsistent shape in nested vectors"); - } + if constexpr (is_vector::value) + { + // Ensure nested vectors have consistent sizes at this level + if (!vec.empty()) + { + size_t expected_size = vec[0].size(); + for (const auto &elem : vec) + { + if (elem.size() != expected_size) + { + throw std::invalid_argument("Inconsistent shape in nested vectors"); } } + } - // Recurse into nested vectors - for (const auto& elem : vec) { - flatten_vector(elem, depth + 1); - } - } else { - // Ensure leaf elements match the Tensor's data type - // static_assert(std::is_same_v, "Element type must match Tensor type"); - for (const auto& elem : vec) { - this->data_->push_back(static_cast(elem)); - } + // Recurse into nested vectors + for (const auto &elem : vec) + { + flatten_vector(elem, depth + 1); } } - - // Constructor for nested vectors - template - Tensor(const std::vector& input) { - flatten_vector(input); - this->calculate_strides(); - } - - // // Recursive helper to process nested initializer lists - // template - // void flatten_list(const std::initializer_list& list, size_t depth = 0) { - // // Handle the current dimension - // if (depth == shapes_.size()) { - // // First encounter with this depth: record size - // shapes_.push_back(list.size()); - // } else { - // // Verify size matches the existing dimension - // if (list.size() != shapes_[depth]) { - // throw std::invalid_argument("Inconsistent shape at depth " + std::to_string(depth)); - // } - // } - - // // Recurse or add data - // if constexpr (is_list::value) { - // // Process nested lists - // for (const auto& elem : list) { - // flatten_list(elem, depth + 1); - // } - // } else { - // // Ensure element type matches Tensor type - // // static_assert(std::is_same_v, "Element type must match Tensor type"); - // for (const auto& elem : list) { - // data_.push_back(static_cast(elem)); - // } - // } - // } - - - // Scaler constructor - Tensor(const T& value) { - this->shapes_ = vector {1}; - this->data_ = make_shared>(1, value); - this->calculate_strides(); + else + { + // Ensure leaf elements match the Tensor's data type + // static_assert(std::is_same_v, "Element type must match Tensor type"); + this->data_->reserve(this->data_->size() + vec.size()); + for (const auto &elem : vec) + { + this->data_->emplace_back(static_cast(elem)); + } } - - // 1D tensor constructor - Tensor(const initializer_list& data_1d) { - this->data_ = make_shared>(data_1d.begin(), data_1d.end()); - this->shapes_ = vector { data_1d.size() }; - this->calculate_strides(); + } + + // Declare friendship so that TensorView can access private members of Tensor + template + friend Tensor dtype_impl(const Tensor &tensor); + +public: + Tensor() = default; + + // Constructor for nested vectors + template + Tensor(const std::vector &input) + { + this->data_ = make_shared>(); + flatten_vector(input); + this->compute_contiguous_strides(); + } + + // // Recursive helper to process nested initializer lists + // template + // void flatten_list(const std::initializer_list& list, size_t depth = 0) { + // // Handle the current dimension + // if (depth == shapes_.size()) { + // // First encounter with this depth: record size + // shapes_.push_back(list.size()); + // } else { + // // Verify size matches the existing dimension + // if (list.size() != shapes_[depth]) { + // throw std::invalid_argument("Inconsistent shape at depth " + std::to_string(depth)); + // } + // } + + // // Recurse or add data + // if constexpr (is_list::value) { + // // Process nested lists + // for (const auto& elem : list) { + // flatten_list(elem, depth + 1); + // } + // } else { + // // Ensure element type matches Tensor type + // // static_assert(std::is_same_v, "Element type must match Tensor type"); + // for (const auto& elem : list) { + // data_.push_back(static_cast(elem)); + // } + // } + // } + + // Scaler constructor + Tensor(const T &value) + { + this->shape_ = vector{1}; + this->data_ = make_shared>(1, value); + this->compute_contiguous_strides(); + } + + // 1D tensor constructor + Tensor(const initializer_list &data_1d) + { + this->data_ = make_shared>(data_1d.begin(), data_1d.end()); + this->shape_ = vector{data_1d.size()}; + this->compute_contiguous_strides(); + } + + // 2D tensor constructor + Tensor(const initializer_list> &data_2d) + { + const size_t n = data_2d.size(), m = data_2d.begin()->size(); + + this->shape_ = vector{n, m}; + + this->data_ = make_shared>(); + this->data_->reserve(n * m); // Optimize memory allocation + + for (const initializer_list &row : data_2d) + { + this->data_->insert(this->data_->end(), row.begin(), row.end()); } + this->compute_contiguous_strides(); + } - // 2D tensor constructor - Tensor(const initializer_list>& data_2d) { - const size_t n = data_2d.size(), m = data_2d.begin()->size(); + // 3D tensor constructor + Tensor(const initializer_list>> &data_3d) + { + const size_t n = data_3d.size(), m = data_3d.begin()->size(), l = data_3d.begin()->begin()->size(); - this->shapes_ = vector { n, m }; + this->shape_ = vector{n, m, l}; - this->data_->reserve(n * m); // Optimize memory allocation + this->data_ = make_shared>(); + this->data_->reserve(n * m * l); // Optimize memory allocation - for (const initializer_list& row : data_2d) { + for (const initializer_list> &matrix : data_3d) + { + for (const initializer_list &row : matrix) + { this->data_->insert(this->data_->end(), row.begin(), row.end()); } - this->calculate_strides(); } + this->compute_contiguous_strides(); + } - // 3D tensor constructor - Tensor(const initializer_list>>& data_3d) { - const size_t n = data_3d.size(), m = data_3d.begin()->size(), l = data_3d.begin()->begin()->size(); + // 4D tensor constructor + Tensor(const initializer_list>>> &data_4d) + { + const size_t n = data_4d.size(), m = data_4d.begin()->size(), l = data_4d.begin()->begin()->size(), k = data_4d.begin()->begin()->begin()->size(); - this->shapes_ = vector { n, m, l }; + this->shape_ = vector{n, m, l, k}; - this->data_->reserve(n * m * l); // Optimize memory allocation + this->data_ = make_shared>(); + this->data_->reserve(n * m * l * k); // Optimize memory allocation - for (const initializer_list>& matrix : data_3d) { - for (const initializer_list& row : matrix) { + for (const initializer_list>> &tensor : data_4d) + { + for (const initializer_list> &matrix : tensor) + { + for (const initializer_list &row : matrix) + { this->data_->insert(this->data_->end(), row.begin(), row.end()); } } - this->calculate_strides(); + } + this->compute_contiguous_strides(); + } + + // certin value constructor + Tensor(const vector &shape, const T &value) + { + this->shape_ = shape; + size_t size = 1; + for (const size_t &dim : shape) + { + size *= dim; } - // 4D tensor constructor - Tensor(const initializer_list>>>& data_4d) { - const size_t n = data_4d.size(), m = data_4d.begin()->size(), l = data_4d.begin()->begin()->size(), k = data_4d.begin()->begin()->begin()->size(); - - this->shapes_ = vector { n, m, l, k }; + this->data_ = make_shared>(size, value); + this->compute_contiguous_strides(); + } + + // copy constructor + Tensor(const Tensor &other) + { + // already overload the = operator + *this = other; + } + + // template + // Tensor(const Tensor &other) + // { + // // use dtype function to convert the data type + // *this = other.dtype<>(); + // } + + // Add two tensors with same shape, element-wise + inline Tensor add(const Tensor &other) const + { + return arithmetic_operation_impl(ArithmeticOp::ADD, other); + } + + // Subtract two tensors with same shape, element-wise + inline Tensor sub(const Tensor &other) const + { + return arithmetic_operation_impl(ArithmeticOp::SUB, other); + } + + // Multiply two tensors with same shape, element-wise + inline Tensor mul(const Tensor &other) const + { + return arithmetic_operation_impl(ArithmeticOp::MUL, other); + } + + // Divide two tensors with same shape, element-wise + inline Tensor div(const Tensor &other) const + { + return arithmetic_operation_impl(ArithmeticOp::DIV, other); + } + + // Multiply all elements of tensor with the given scaler + Tensor mul(const T &scaler) const + { + Tensor result = *this; + + for (size_t i = 0; i < this->size(); i++) + { + (*result.data_)[i] *= scaler; + } + return result; + } - this->data_->reserve(n * m * l * k); // Optimize memory allocation + // Divide all elements of tensor with the given scaler + Tensor div(const T &scaler) const + { + Tensor result = *this; - for (const initializer_list>>& tensor : data_4d) { - for (const initializer_list>& matrix : tensor) { - for (const initializer_list& row : matrix) { - this->data_->insert(this->data_->end(), row.begin(), row.end()); - } - } - } - this->calculate_strides(); + for (size_t i = 0; i < this->size(); i++) + { + (*result.data_)[i] /= scaler; } - - // certin value constructor - Tensor(const vector& shape, const T& value) { - this->shapes_ = shape; - size_t size = 1; - for (const size_t& dim : shape) { - size *= dim; - } - - this->data_->resize(size, value); - this->calculate_strides(); + return result; + } + + /** + * Matrix multiplication of two tensors. + * + * The two tensors must have at least two dimensions. The leading dimensions (all except last two) must be equal. + * The last two dimensions must match the matrix multiplication dimensions. + * For example, if the first tensor has shape [a, b, n, m] and the second tensor has shape [a, b, m, p], the result will have shape [a, b, n, p]. + * + * The result is a tensor with the leading dimensions of the first tensor and the matrix multiplication result as the last two dimensions. + * + * The total number of batches is the product of the leading dimensions. + * + * The matrix multiplication is performed batched, i.e., for each batch, a matrix multiplication is performed. + * + * @param other The tensor to multiply with. + * @return The result of the matrix multiplication. + */ + Tensor matmul(const Tensor &other) const + { + // Ensure both tensors have at least 2 dimensions + size_t A_ndim = this->ndim(), B_ndim = other.ndim(); + + if (A_ndim < 2 || B_ndim < 2) + { + throw std::runtime_error("Tensors must have at least 2 dimensions for matrix multiplication"); } - // copy constructor - Tensor(const Tensor& other) { - *this = other; - } + // Check leading dimensions (all except last two) are equal + const size_t A_leading_ndim = A_ndim - 2; + const size_t B_leading_ndim = B_ndim - 2; - // Add two tensors with same shape, element-wise - inline Tensor add(const Tensor& other) const { - return arithmetic_operation_impl(ArithmeticOp::ADD, other); + if (A_leading_ndim != B_leading_ndim) + { + throw std::runtime_error("Number of leading dimensions must match"); } - // Subtract two tensors with same shape, element-wise - inline Tensor sub(const Tensor& other) const { - return arithmetic_operation_impl(ArithmeticOp::SUB, other); - } + vector A_leading_shape(this->shape_.begin(), this->shape_.end() - 2); + vector B_leading_shape(other.shape_.begin(), other.shape_.end() - 2); - // Multiply two tensors with same shape, element-wise - inline Tensor mul(const Tensor& other) const { - return arithmetic_operation_impl(ArithmeticOp::MUL, other); + if (A_leading_shape != B_leading_shape) + { + throw invalid_argument("Batch dimensions must match"); } - // Divide two tensors with same shape, element-wise - inline Tensor div(const Tensor& other) const { - return arithmetic_operation_impl(ArithmeticOp::DIV, other); + // Extract matrix dimensions + const size_t n = this->shape_[A_ndim - 2]; + const size_t m = this->shape_[A_ndim - 1]; + const size_t m_other = other.shape_[B_ndim - 2]; + const size_t p = other.shape_[B_ndim - 1]; + + if (m != m_other) + { + throw std::invalid_argument("Matrix dimension mismatch: last dimension of first tensor must match second last of second tensor"); } - // Multiply all elements of tensor with the given scaler - Tensor mul(const T& scaler) const { - Tensor result(this->shapes_, static_cast(0)); + // Determine result shape: leading dimensions + [n, p] + vector result_shapes = A_leading_shape; + result_shapes.push_back(n); + result_shapes.push_back(p); - for (size_t i = 0; i < this->size(); i++) { - (*result.data_)[i] = (*this->data_)[i] * scaler; - } - return result; - } - - - /** - * Matrix multiplication of two tensors. - * - * The two tensors must have at least two dimensions. The leading dimensions (all except last two) must be equal. - * The last two dimensions must match the matrix multiplication dimensions. - * For example, if the first tensor has shape [a, b, n, m] and the second tensor has shape [a, b, m, p], the result will have shape [a, b, n, p]. - * - * The result is a tensor with the leading dimensions of the first tensor and the matrix multiplication result as the last two dimensions. - * - * The total number of batches is the product of the leading dimensions. - * - * The matrix multiplication is performed batched, i.e., for each batch, a matrix multiplication is performed. - * - * @param other The tensor to multiply with. - * @return The result of the matrix multiplication. - */ - Tensor matmul(const Tensor& other) const { - // Ensure both tensors have at least 2 dimensions - size_t A_ndim = this->ndim(), B_ndim = other.ndim(); - - if (A_ndim < 2 || B_ndim < 2) { - throw std::runtime_error("Tensors must have at least 2 dimensions for matrix multiplication"); - } + Tensor result(result_shapes, static_cast(0)); - // Check leading dimensions (all except last two) are equal - const size_t A_leading_ndim = A_ndim - 2; - const size_t B_leading_ndim = B_ndim - 2; + // Compute total number of batches (product of leading dimensions) + // may be we can use divisoin in stride to have O(1) time + size_t total_batches = 1; + for (const size_t &dim : A_leading_shape) + { + total_batches *= dim; + } - if (A_leading_ndim != B_leading_ndim) { - throw std::runtime_error("Number of leading dimensions must match"); + for (size_t batch = 0; batch < total_batches; ++batch) + { + // Get multi_dimensional indices for this batch + vector indices = linear_to_multi_idxs(batch, A_leading_shape); + + // Compute offsets for A, B, and result + size_t A_offset = this->offset_; + size_t B_offset = other.offset_; + size_t result_offset = 0; + + for (size_t i = 0; i < A_leading_ndim; ++i) + { + A_offset += indices[i] * this->strides_[i]; + B_offset += indices[i] * other.strides_[i]; + result_offset += indices[i] * result.strides_[i]; } - vector A_leading_shape(this->shapes_.begin(), this->shapes_.end() - 2); - vector B_leading_shape(other.shapes_.begin(), other.shapes_.end() - 2); + for (size_t i = 0; i < n; ++i) + { + for (size_t j = 0; j < p; ++j) + { + T sum = static_cast(0); - if (A_leading_shape != B_leading_shape) { - throw invalid_argument("Batch dimensions must match"); - } + for (size_t k = 0; k < m; ++k) + { + // Calculate offsets in A and B + size_t a_idx = A_offset + + i * this->strides_[A_leading_ndim] + + k * this->strides_[A_leading_ndim + 1]; - // Extract matrix dimensions - const size_t n = this->shapes_[A_ndim - 2]; - const size_t m = this->shapes_[A_ndim - 1]; - const size_t m_other = other.shapes_[B_ndim - 2]; - const size_t p = other.shapes_[B_ndim - 1]; + size_t b_idx = B_offset + + k * other.strides_[B_leading_ndim] + + j * other.strides_[B_leading_ndim + 1]; - if (m != m_other) { - throw std::invalid_argument("Matrix dimension mismatch: last dimension of first tensor must match second last of second tensor"); + sum += (*this->data_)[a_idx] * (*other.data_)[b_idx]; + } + // Write to result + size_t out_idx = result_offset + + i * result.strides_[result.ndim() - 2] + + j * result.strides_.back(); + (*result.data_)[out_idx] = sum; + } } + } - // Determine result shape: leading dimensions + [n, p] - vector result_shapes = A_leading_shape; - result_shapes.push_back(n); - result_shapes.push_back(p); + return result; + } - Tensor result(result_shapes, static_cast(0)); + /// @brief Transpose the tensor. + /// @details This function supports transposing 1D and 2D tensors. + /// 1D tensors are transposed from shape (1, n) to (n, 1). + /// For 2D tensors, it swaps rows and columns. + /// @return A new tensor that is the transpose of the original tensor. + /// @throws runtime_error if the tensor has more than 2 dimensions. - // Compute total number of batches (product of leading dimensions) - // may be we can use divisoin in stride to have O(1) time - size_t total_batches = 1; - for (const size_t& dim: A_leading_shape) { - total_batches *= dim; - } + Tensor transpose(int64_t dim0 = -2, int64_t dim1 = -1) const + { + const size_t ndim = this->ndim(); - for (size_t batch = 0; batch < total_batches; ++batch) { - // Get multi_dimensional indices for this batch - vector indices = linear_to_multi_idxs(batch, A_leading_shape); + if (ndim == 1 && dim0 == -2 && dim1 == -1) + { + Tensor result = *this; + return result.reshape({this->size(), 1}); + } - // Compute offsets for A, B, and result - size_t A_offset = this->offset_; - size_t B_offset = other.offset_; - size_t result_offset = 0; + if (dim0 == dim1) + { + return *this; // No-op if dimensions are the same + } - for (size_t i = 0; i < A_leading_ndim; ++i) { - A_offset += indices[i] * this->strides_[i]; - B_offset += indices[i] * other.strides_[i]; - result_offset += indices[i] * result.strides_[i]; - } + if (dim0 < 0) + { + dim0 += ndim; + } - for (size_t i = 0; i < n; ++i) { - for (size_t j = 0; j < p; ++j) { - T sum = static_cast(0); - - for (size_t k = 0; k < m; ++k) { - // Calculate offsets in A and B - size_t a_idx = A_offset + - i * this->strides_[A_leading_ndim] + - k * this->strides_[A_leading_ndim + 1]; - - size_t b_idx = B_offset + - k * other.strides_[B_leading_ndim] + - j * other.strides_[B_leading_ndim + 1]; - - sum += (*this->data_)[a_idx] * (*other.data_)[b_idx]; - } - // Write to result - size_t out_idx = result_offset + - i * result.strides_[result.ndim() - 2] + - j * result.strides_.back(); - (*result.data_)[out_idx] = sum; - } - } - } + if (dim1 < 0) + { + dim1 += ndim; + } - return result; + if (dim0 < 0 || dim0 >= ndim || dim1 < 0 || dim1 >= ndim) + { + throw out_of_range("Transpose dimensions out of range"); } - - /// @brief Transpose the tensor. - /// @details This function supports transposing 1D and 2D tensors. - /// 1D tensors are transposed from shape (1, n) to (n, 1). - /// For 2D tensors, it swaps rows and columns. - /// @return A new tensor that is the transpose of the original tensor. - /// @throws runtime_error if the tensor has more than 2 dimensions. + // Create new tensor with swapped dimensions + Tensor result = *this; + swap(result.shape_[dim0], result.shape_[dim1]); + swap(result.strides_[dim0], result.strides_[dim1]); - Tensor transpose(int64_t dim0=-2, int64_t dim1=-1) const { - const size_t ndim = this->ndim(); + // cout << "result.shapes_: " << result.shapes_[0] << " " << result.shapes_[1] << endl; + // cout << "result.strides_: " << result.strides_[0] << " " << result.strides_[1] << endl; - if (ndim == 1 && dim0 == -2 && dim1 == -1) { - Tensor result = *this; - result.reshape({this->size(), 1}); - return result; - } + return result; + } - if (dim0 == dim1) { - return *this; // No-op if dimensions are the same - } + template + Tensor permute(Dims... dims) const + { + vector perm_dims = {static_cast(dims)...}; - if (dim0 < 0) { - dim0 += ndim; - } + size_t ndim = this->ndim(); - if (dim1 < 0) { - dim1 += ndim; - } + if (perm_dims.size() != ndim) + { + throw std::invalid_argument("Number of dimensions in permutation must match tensor's number of dimensions"); + } - if (dim0 < 0 || dim0 >= ndim || dim1 < 0 || dim1 >= ndim) { - throw out_of_range("Transpose dimensions out of range"); + unordered_set seen_dims; + for (size_t dim : perm_dims) + { + if (dim >= ndim) + { + throw out_of_range("Permute dimension out of range"); + } + if (seen_dims.count(dim)) + { + throw invalid_argument("Duplicate dimension in permute"); } + seen_dims.insert(dim); + } - // Create new tensor with swapped dimensions - Tensor result = *this; - swap(result.shapes_[dim0], result.shapes_[dim1]); - swap(result.strides_[dim0], result.strides_[dim1]); + vector new_shapes(ndim); + vector new_strides(ndim); - cout << "result.shapes_: " << result.shapes_[0] << " " << result.shapes_[1] << endl; - cout << "result.strides_: " << result.strides_[0] << " " << result.strides_[1] << endl; + size_t i = 0; + for (size_t dim : perm_dims) + { + if (dim >= ndim) + { + throw std::out_of_range("Permutation dimension out of range"); + } - return result; + new_shapes[i] = this->shape_[dim]; + new_strides[i] = this->strides_[dim]; + ++i; } - /// @brief Flatten the tensor into 1D in-place. - /// @details This function only changes the shape of the tensor, and does not modify the underlying data. - /// @post The shape of the tensor is changed to 1D, with the same elements as the original tensor. - void flatten() { - this->shapes_ = { this->size() }; - this->calculate_strides(); - return; + Tensor result = *this; + result.shape_ = new_shapes; + result.strides_ = new_strides; + + return result; + } + + /** + * Flattens the dimensions of the tensor from start_dim to end_dim into a single dimension. + * + * This function collapses multiple dimensions of the tensor into one, effectively reducing + * the number of dimensions by merging the specified range of dimensions. If start_dim or + * end_dim is negative, it will be counted from the last dimension backwards. The resulting + * tensor will have the same total number of elements as the original tensor. + * + * @param start_dim The starting dimension index to begin flattening. Defaults to 0. + * @param end_dim The ending dimension index to stop flattening. Defaults to -1, which + * refers to the last dimension. + * @return A new tensor with the specified dimensions flattened. + * + * @throws std::invalid_argument if start_dim is greater than end_dim. + * @throws std::out_of_range if start_dim or end_dim is out of the range of the tensor's dimensions. + */ + + Tensor<> flatten(int64_t start_dim = 0, int64_t end_dim = -1) const + { + if (start_dim < 0) + { + start_dim += this->ndim(); + } + if (end_dim < 0) + { + end_dim += this->ndim(); } - /// @brief Calculate the absolute value of each element in the tensor - /// @return a new tensor with the same shape as the original, but with each element replaced by its absolute value - Tensor abs() const { - Tensor result(this->shapes_, static_cast(0)); - - for (size_t i = 0; i < this->size(); i++) { - (*result.data_)[i] = std::abs((*this->data_)[i]); - } + if (start_dim > end_dim) + { + throw invalid_argument("Start dimension must be less than or equal to end dimension"); + } - return result; + if (start_dim < 0 || start_dim >= this->ndim() || end_dim < 0 || end_dim >= this->ndim()) + { + throw out_of_range("Flatten dimensions out of range"); } - /// @brief Filter the tensor with the given function - /// @param func a function to test each element of the tensor. It should return true if the element passes the test - /// @return a new tensor with the same shape as the original, but all elements that fail the test are set to 0. - Tensor filter(bool (*func)(T)) const { - Tensor result(this->shapes_, static_cast(0)); + vector new_shape; + new_shape.reserve(this->ndim() - (end_dim - start_dim + 1) + 1); - for (size_t i = 0; i < this->size(); i++) { - if (func((*this->data_)[i])) { - (*result.data_)[i] = (*this->data_)[i]; - } + for (size_t i = 0; i < this->ndim(); ++i) + { + if (i <= start_dim || i > end_dim) + { + new_shape.push_back(this->shape_[i]); + } + else + { + new_shape[new_shape.size() - 1] *= this->shape_[i]; } - - return result; } - /// @brief Perform element-wise transformation with a function - /// @param func a function to perform element-wise transformation to the tensor - /// @return a new tensor with the same shape as the original, but with each element transformed by the given func - Tensor map(T (*func)(T)) const { - Tensor result(this->shapes_, static_cast(0)); + return this->reshape(new_shape); + } - for (size_t i = 0; i < this->size(); i++) { - (*result.data_)[i] = func((*this->data_)[i]); - } + /// @brief Calculate the absolute value of each element in the tensor + /// @return a new tensor with the same shape as the original, but with each element replaced by its absolute value + Tensor abs() const + { + Tensor result = *this; - return result; + for (size_t i = 0; i < this->size(); i++) + { + (*result.data_)[i] = std::abs((*this->data_)[i]); } - /// @brief Calculate the sum of all elements in the tensor - /// @return The sum of all elements in the tensor, regardless of the dimension - T sum() const { - T sum = static_cast(0); - - for (size_t i = 0; i < this->size(); i++) { - sum += (*this->data_)[i]; + return result; + } + + /// @brief Filter the tensor with the given function + /// @param func a function to test each element of the tensor. It should return true if the element passes the test + /// @return a new tensor with the same shape as the original, but all elements that fail the test are set to 0. + Tensor filter(bool (*func)(T)) const + { + Tensor result = *this; + + for (size_t i = 0; i < this->size(); i++) + { + if (!func((*this->data_)[i])) + { + (*result.data_)[i] = static_cast(0); } - - return sum; } - /// @brief Check if all elements of two tensors are equal - /// @param other Tensor to compare - /// @return Tensor of integers where each element is 1 if the two tensors are equal at the same index, 0 otherwise - Tensor equal(const Tensor& other) const{ - if (other.shapes_ != this->shapes_) { - throw runtime_error("Shape mismatch"); - } - - Tensor result(this->shapes_, static_cast(0)); - const vector& result_strides = result.strides_; + return result; + } - for (size_t i = 0; i < this->size(); i++) { - auto [a_offset, b_offset] = calculate_tensors_offsets(i, this->ndim(), result_strides, other); + /// @brief Perform element-wise transformation with a function + /// @param func a function to perform element-wise transformation to the tensor + /// @return a new tensor with the same shape as the original, but with each element transformed by the given func + Tensor map(T (*func)(T)) const + { + Tensor result = *this; - (*result.data_)[i] = (*this->data_)[a_offset] == (*other.data_)[b_offset]; - } - - return result.dtype(); + for (size_t i = 0; i < this->size(); i++) + { + (*result.data_)[i] = func((*this->data_)[i]); } - /// @brief Check if all elements of two tensors are equal - /// @param other Tensor to compare - /// @return true if all elements are equal, false otherwise - bool compare(const Tensor& other) const { - if (other.shapes_ != this->shapes_) { - throw runtime_error("Shape mismatch"); - } + return result; + } - for (size_t i = 0; i < this->size(); i++) { - auto [a_offset, b_offset] = calculate_tensors_offsets(i, this->ndim(), this->strides_, other); + /// @brief Calculate the sum of all elements in the tensor + /// @return The sum of all elements in the tensor, regardless of the dimension + T sum() const + { + T sum = static_cast(0); - if ((*this->data_)[a_offset] != (*other.data_)[b_offset]) { - return false; - } - } - return true; + for (size_t i = 0; i < this->size(); i++) + { + sum += (*this->data_)[i]; } - /// @brief Reduce the tensor to the maximum value of all elements - /// @return a tensor with a single element, the maximum of all elements in the tensor - inline Tensor<> max() const { - return reduce_impl(ReduceOp::MAX); + return sum; + } + + /// @brief Check if all elements of two tensors are equal + /// @param other Tensor to compare + /// @return Tensor of integers where each element is 1 if the two tensors are equal at the same index, 0 otherwise + Tensor equal(const Tensor &other) const + { + if (other.shape_ != this->shape_) + { + throw runtime_error("Shape mismatch"); } + Tensor result(this->shape_, static_cast(0)); + const vector &result_strides = result.strides_; + + for (size_t i = 0; i < this->size(); i++) + { + auto [a_offset, b_offset] = calculate_tensors_offsets(i, this->ndim(), result_strides, other); - /// @brief Reduce the tensor to the indices of the maximum values along each row - /// @return a tensor with indices of the maximum values for each row - inline Tensor argmax() const { - return reduce_impl(ReduceOp::ARGMAX); + (*result.data_)[i] = (*this->data_)[a_offset] == (*other.data_)[b_offset]; } - /// @brief Reduce the tensor to the minimum value of all elements - /// @return a tensor with a single element, the minimum of all elements in the tensor - inline Tensor<> min() const { - return reduce_impl(ReduceOp::MIN); + return result.dtype(); + } + + /// @brief Check if all elements of two tensors are equal + /// @param other Tensor to compare + /// @return true if all elements are equal, false otherwise + bool compare(const Tensor &other) const + { + if (other.shape_ != this->shape_) + { + throw runtime_error("Shape mismatch"); } - /// @brief Reduce the tensor to the indices of the minimum values along each row - /// @return a tensor with indices of the minimum values for each row - inline Tensor argmin() const { - return reduce_impl(ReduceOp::ARGMIN); + for (size_t i = 0; i < this->size(); i++) + { + auto [a_offset, b_offset] = calculate_tensors_offsets(i, this->ndim(), this->strides_, other); + + if ((*this->data_)[a_offset] != (*other.data_)[b_offset]) + { + return false; + } } + return true; + } + + /// @brief Reduce the tensor to the maximum value of all elements + /// @return a tensor with a single element, the maximum of all elements in the tensor + inline Tensor<> max() const + { + return reduce_impl(ReduceOp::MAX); + } + + /// @brief Reduce the tensor to the indices of the maximum values along each row + /// @return a tensor with indices of the maximum values for each row + inline Tensor argmax() const + { + return reduce_impl(ReduceOp::ARGMAX); + } + + /// @brief Reduce the tensor to the minimum value of all elements + /// @return a tensor with a single element, the minimum of all elements in the tensor + inline Tensor<> min() const + { + return reduce_impl(ReduceOp::MIN); + } + + /// @brief Reduce the tensor to the indices of the minimum values along each row + /// @return a tensor with indices of the minimum values for each row + inline Tensor argmin() const + { + return reduce_impl(ReduceOp::ARGMIN); + } + + /// @brief Convert the tensor to a tensor of a different type. + /// @details If U is not provided, it defaults to double. + /// @param U the type to convert to + /// @return a tensor with the same shape and data, but with the type U + template + Tensor dtype() const + { + return dtype_impl(*this); + } + + /// @brief Reshape the tensor to the specified new shape. + /// @details This function changes the shape of the tensor without altering the data. + /// The total number of elements must remain the same; otherwise, an exception is thrown. + /// @param new_shape The desired shape for the tensor. + /// @throws runtime_error if the new shape is not compatible with the current number of elements. + Tensor<> reshape(const vector &new_shape) const + { + // Calculate total elements for both shapes + const int64_t current_elements = accumulate( + this->shape_.begin(), this->shape_.end(), 1, multiplies()); + const int64_t new_elements = accumulate( + new_shape.begin(), new_shape.end(), 1, multiplies()); + + if (current_elements != new_elements) + { + throw runtime_error("New shape must be compatible with the original shape"); + } + + // Check if the data is stored in a contiguous way + vector original_strides(this->ndim(), 0); + int64_t stride = 1; - - /// @brief Convert the tensor to a tensor of a different type. - /// @details If U is not provided, it defaults to double. - /// @param U the type to convert to - /// @return a tensor with the same shape and data, but with the type U - template - Tensor dtype() const { - return dtype_impl(*this); + for (int64_t i = this->ndim() - 1; i >= 0; --i) + { + original_strides[i] = stride; + stride *= this->shape_[i]; } - /// @brief Reshape the tensor to the specified new shape. - /// @details This function changes the shape of the tensor without altering the data. - /// The total number of elements must remain the same; otherwise, an exception is thrown. - /// @param new_shape The desired shape for the tensor. - /// @throws runtime_error if the new shape is not compatible with the current number of elements. - void reshape(const vector& new_shape) { - size_t new_size = 1; - for (const size_t& dim : new_shape) { - new_size *= dim; - } + Tensor result; - if (new_size != this->size()) { - throw runtime_error("New shape must be compatible with the original shape"); - } + // If the data is not stored in a contiguous way, the stride will not be a cumulative product of the shape + if (original_strides != this->strides_) + { + cout << "Clone the tensor" << endl; + /* + This part is a little bit complicated - this->shapes_ = new_shape; - this->calculate_strides(); + Since the data may not store in a contiguous way, there is a problem when we directly change the shape of the tensor. + We will loss the tracking of the strides of the tensor. - return; - } + Therefore we have to make the data stored in a contiguous way first, then we can change the shape of the tensor. + clone() function will create a new tensor with the same shape and data as the current tensor. - /// @brief Return a deep copy of the tensor. Actually the same as the copy constructor. - /// @details This function will create a new tensor with the same shape and data as the current tensor. - /// @return a new tensor which is a deep copy of the current tensor - Tensor clone() const { - Tensor result = *this; + If we directly use copy constructor, the data will not be stored in a contiguous way. + Since I don't rearange the data in the copy constructor. - return result; - } + Eventully the tensor data is guaranteed to be stored in the contiguous way, so we can directly change the shape of the tensor. + */ - vector to_vector() const { return (*this->data_); } - - // Get the dimension of the tensor - inline size_t ndim() const { - return this->shapes_.size(); + // Create a new tensor with contiguous data + result = this->clone(); + } + else + { + // the data is not stored in a contiguous way + result = *this; } - const size_t size() const { - size_t n = 1; - for (const size_t& s : this->shapes_) { - n *= s; + result.shape_ = new_shape; + result.compute_contiguous_strides(); + + return result; + } + + /// @brief Return a deep copy of the tensor. The data is copied to a new contiguous storage (and this is the only difference from copy constructor). + /// @details This function will create a new tensor with the same shape and data as the current tensor. + /// @return a new tensor which is a deep copy of the current tensor + Tensor clone() const + { + Tensor result; + + result.shape_ = this->shape_; + result.data_ = make_shared>(*(this->data_)); + result.compute_contiguous_strides(); + + // Copy data from original tensor's view to the new contiguous storage + for (size_t i = 0; i < this->size(); ++i) + { + vector indices = linear_to_multi_idxs(i, result.shape_); + size_t src_offset = this->offset_; + + for (size_t dim = 0; dim < indices.size(); ++dim) + { + src_offset += indices[dim] * this->strides_[dim]; } - return n; - } - /// @brief Print the tensor to console. - /// @details This function will print the tensor in a nested array style. - void print() const { - print_recursive_impl(0, 0, 0); - cout << endl; // flush the output - return; + (*result.data_)[i] = (*this->data_)[src_offset]; } - inline const vector& shapes() const { return this->shapes_; } + return result; + } + static Tensor arange(size_t start, size_t end = 0, vector shape = {0}) + { + if (start == end) // if only one argument is provided + { + throw runtime_error("arange() missing required argument: 'end'"); + } + if (end == 0) + { + end = start; + start = 0; + } - // ========================================operators overloading======================================== - inline Tensor operator+(const Tensor& other) const { return this->add(other); } - inline Tensor operator-(const Tensor& other) const { return this->sub(other); } - inline Tensor operator*(const Tensor& other) const { return this->mul(other); } - inline Tensor operator*(const T& scaler) const { return this->mul(scaler); } - inline bool operator==(const Tensor& other) const { return this->compare(other); } + if (shape.size() == 1 && shape[0] <= 0) + { + shape[0] = end - start + 1; + } - Tensor& operator=(const Tensor& other) { - if (this == &other) return *this; + Tensor result(shape, static_cast(0)); - this->shapes_ = other.shapes_; - this->data_ = make_shared>(*(other.data_)); - this->calculate_strides(); + cout << "In arange, weight address: " << &result.data_ << endl; - // Copy data from original tensor's view to the new contiguous storage - for (size_t i = 0; i < this->size(); ++i) { - vector indices = linear_to_multi_idxs(i, this->shapes_); - size_t src_offset = other.offset_; + size_t idx = 0; + for (size_t i = start; i <= end; i++) + { + (*result.data_)[idx] = static_cast(i); + idx++; + } - for (size_t dim = 0; dim < indices.size(); ++dim) { - src_offset += indices[dim] * other.strides_[dim]; - } + return result; + } - (*this->data_)[i] = (*other.data_)[src_offset]; - } + // Get the dimension of the tensor + inline size_t ndim() const + { + return this->shape_.size(); + } - return *this; + const size_t size() const + { + if (this->offset_ == 0) + { + return this->data_->size(); } - const Tensor operator+=(const Tensor& other) { - *this = *this + other; - return *this; + if (this->size_ != -1) + { + return this->size_; } - const Tensor operator-=(const Tensor& other) { - *this = *this - other; - return *this; + this->size_ = 1; + for (const size_t &s : this->shape_) + { + this->size_ *= s; } - const Tensor operator*=(const Tensor& other) { - *this = *this * other; + return this->size_; + } + + /// @brief Print the tensor to console. + /// @details This function will print the tensor in a nested array style. + void print() const + { + print_recursive_impl(0, 0, 0); + cout << endl; // flush the output + return; + } + + inline const vector &shapes() const { return this->shape_; } + + // ========================================operators overloading======================================== + inline Tensor operator+(const Tensor &other) const { return this->add(other); } + inline Tensor operator-(const Tensor &other) const { return this->sub(other); } + inline Tensor operator*(const Tensor &other) const { return this->mul(other); } + inline Tensor operator*(const T &scaler) const { return this->mul(scaler); } + inline Tensor operator/(const Tensor &other) const { return this->div(other); } + inline Tensor operator/(const T &scaler) const { return this->div(scaler); } + inline bool operator==(const Tensor &other) const { return this->compare(other); } + + /* + Instead of returning a new tensor, we modify the current tensor in place. + + Besides, it is slightly different from method clone(), in which it will not modify data_ to make all the elements stored contiguously. + */ + Tensor &operator=(const Tensor &other) + { + if (this == &other) return *this; - } - const Tensor operator*=(const T& other) { - *this = *this * other; - return *this; + this->shape_ = other.shape_; + this->data_ = make_shared>(*(other.data_)); + this->strides_ = other.strides_; + this->offset_ = other.offset_; + this->size_ = other.size_; + + return *this; + } + + const Tensor operator+=(const Tensor &other) + { + *this = *this + other; + return *this; + } + + const Tensor operator-=(const Tensor &other) + { + *this = *this - other; + return *this; + } + + const Tensor operator*=(const Tensor &other) + { + *this = *this * other; + return *this; + } + + const Tensor operator*=(const T &other) + { + *this = *this * other; + return *this; + } + + const Tensor operator/=(const Tensor &other) + { + *this = *this / other; + return *this; + } + + const Tensor operator/=(const T &other) + { + *this = *this / other; + return *this; + } + + // lvalue operator overloading + template + T &operator[](Indices... indices) + { + vector idxs = this->get_idxs(indices...); + return (*this->data_)[this->calculate_idx(idxs)]; + } + + // Using vector to index the tensor + T &operator[](const vector &indices) + { + return (*this->data_)[this->calculate_idx(indices)]; + } + + // rvalue operator overloading + template + const T &operator[](Indices... indices) const + { + vector idxs = this->get_idxs(indices...); + return (*this->data_)[this->calculate_idx(idxs)]; + } + + // Using vector to index the tensor + const T &operator[](const vector &indices) const + { + return (*this->data_)[this->calculate_idx(indices)]; + } + + /** + * @brief Advanced indexing using a combination of integers, strings, and slices. + * + * This function allows for flexible indexing into the tensor, similar to Python's + * advanced indexing. It supports integer indices, string-based slices, and the ellipsis + * ("...") for automatic dimension completion. The function expands slices and handles + * ellipsis to generate the appropriate sub-tensor. + * + * @param indices A vector of indices where each index can be an integer, a string + * representing a slice, or a special ellipsis ("..."). + * @return A new tensor that is indexed from the current tensor according to the given indices. + * + * @throw std::invalid_argument if an index type is invalid or if more than one ellipsis is used. + */ + using IndexType = variant; + Tensor index(const vector &indices) const + { + vector> expanded_indices; + + // Handle ellipsis and expand slices + // cout << "Start expanding indices" << endl; + for (size_t i = 0; i < indices.size(); ++i) + { + const auto &idx = indices[i]; + + if (auto str_idx = get_if(&idx)) + { + Slice slice = Slice::parse(*str_idx); + expanded_indices.push_back(apply_slice(slice, this->shape_[i])); + } + else if (auto int_idx = get_if(&idx)) + { + expanded_indices.push_back({normalize_index(*int_idx, this->shape_[i])}); + } + else if (auto slice_idx = get_if(&idx)) + { + expanded_indices.push_back(apply_slice(*slice_idx, this->shape_[i])); + } + else + { + throw std::invalid_argument("Invalid index type"); + } } - // lvalue operator overloading - template - T& operator[](Indices... indices) { - vector idxs = this->get_idxs(indices...); - return (*this->data_)[this->calculate_idx(idxs)]; - } - - T& operator[](const vector& indices) { - return (*this->data_)[this->calculate_idx(indices)]; - } - - // rvalue operator overloading - template - const T& operator[](Indices... indices) const { - vector idxs = this->get_idxs(indices...); - return (*this->data_)[this->calculate_idx(idxs)]; - } - - const T& operator[](const vector& indices) const { - return (*this->data_)[this->calculate_idx(indices)]; - } - - /** - * @brief Advanced indexing using a combination of integers, strings, and slices. - * - * This function allows for flexible indexing into the tensor, similar to Python's - * advanced indexing. It supports integer indices, string-based slices, and the ellipsis - * ("...") for automatic dimension completion. The function expands slices and handles - * ellipsis to generate the appropriate sub-tensor. - * - * @param indices A vector of indices where each index can be an integer, a string - * representing a slice, or a special ellipsis ("..."). - * @return A new tensor that is indexed from the current tensor according to the given indices. - * - * @throw std::invalid_argument if an index type is invalid or if more than one ellipsis is used. - */ - using IndexType = variant; - Tensor index(const vector& indices) const { - vector> expanded_indices; - - // Handle ellipsis and expand slices - // cout << "Start expanding indices" << endl; - for (size_t i = 0; i < indices.size(); ++i) { - const auto& idx = indices[i]; - - if (auto str_idx = get_if(&idx)) { - Slice slice = Slice::parse(*str_idx); - expanded_indices.push_back(apply_slice(slice, this->shapes_[i])); - } - else if (auto int_idx = get_if(&idx)) { - expanded_indices.push_back({normalize_index(*int_idx, this->shapes_[i])}); - } - else if (auto slice_idx = get_if(&idx)) { - expanded_indices.push_back(apply_slice(*slice_idx, this->shapes_[i])); - } - else { - throw std::invalid_argument("Invalid index type"); + // Calculate new dimensions + vector new_dims; + for (const vector &expanded_idx : expanded_indices) + { + if (expanded_idx[0] != -1) + { // Not None/newaxis + if (expanded_idx.size() > 1) + { + new_dims.push_back(expanded_idx.size()); } } - - // Calculate new dimensions - vector new_dims; - for (const vector& expanded_idx : expanded_indices) { - if (expanded_idx[0] != -1) { // Not None/newaxis - if (expanded_idx.size() > 1) { - new_dims.push_back(expanded_idx.size()); - } - } - else { - new_dims.push_back(1); - } + else + { + new_dims.push_back(1); } + } - // cout << "Start printing new_dims" << endl; - // cout << "new_dims size: " << new_dims.size() << endl; - // for (size_t i = 0; i < new_dims.size(); ++i) { - // cout << new_dims[i] << " "; - // } - - // Create result tensor - Tensor result(new_dims, static_cast(0)); - - // Fill result tensor - vector current_indices(expanded_indices.size()); - vector result_indices; - - // Recursive lambda to fill result tensor - function fill_tensor = [&](size_t depth) { - if (depth == expanded_indices.size()) { - result_indices.clear(); - for (int i = 0; i < expanded_indices.size(); ++i) { - if (expanded_indices[i][0] != -1 && expanded_indices[i].size() > 1) { - result_indices.push_back(current_indices[i]); - } - } - - vector original_indices; - for (int i = 0; i < expanded_indices.size(); ++i) { - if (expanded_indices[i][0] != -1) { - original_indices.push_back(expanded_indices[i][current_indices[i]]); - } + // cout << "Start printing new_dims" << endl; + // cout << "new_dims size: " << new_dims.size() << endl; + // for (size_t i = 0; i < new_dims.size(); ++i) { + // cout << new_dims[i] << " "; + // } + + // Create result tensor + Tensor result(new_dims, static_cast(0)); + + // Fill result tensor + vector current_indices(expanded_indices.size()); + vector result_indices; + + // Recursive lambda to fill result tensor + function fill_tensor = [&](size_t depth) + { + if (depth == expanded_indices.size()) + { + result_indices.clear(); + for (int i = 0; i < expanded_indices.size(); ++i) + { + if (expanded_indices[i][0] != -1 && expanded_indices[i].size() > 1) + { + result_indices.push_back(current_indices[i]); } - - result[result_indices] = (*this)[original_indices]; - return; } - - for (int i = 0; i < expanded_indices[depth].size(); ++i) { - current_indices[depth] = i; - fill_tensor(depth + 1); + + vector original_indices; + for (int i = 0; i < expanded_indices.size(); ++i) + { + if (expanded_indices[i][0] != -1) + { + original_indices.push_back(expanded_indices[i][current_indices[i]]); + } } - }; - - fill_tensor(0); - return result; - } + + result[result_indices] = (*this)[original_indices]; + return; + } + + for (int i = 0; i < expanded_indices[depth].size(); ++i) + { + current_indices[depth] = i; + fill_tensor(depth + 1); + } + }; + + fill_tensor(0); + return result; + } }; \ No newline at end of file diff --git a/include/modules/activations/softmax.hpp b/include/modules/activations/softmax.hpp index 96542cc..9447211 100644 --- a/include/modules/activations/softmax.hpp +++ b/include/modules/activations/softmax.hpp @@ -10,6 +10,7 @@ class Softmax : public Module { Tensor<> softmax_helper(const Tensor<>& input); vector softmax_helper(const vector& input); public: + Softmax(); Tensor<> forward(const Tensor<>& input); Tensor<> backward(const Tensor<>& grad_output); const Tensor<>& get_softmax_input_cache() const { return this->softmax_input_cache_; } diff --git a/include/modules/layers/conv2d.hpp b/include/modules/layers/conv2d.hpp index 2d68d66..1868be9 100644 --- a/include/modules/layers/conv2d.hpp +++ b/include/modules/layers/conv2d.hpp @@ -1,30 +1,50 @@ +#pragma once +#include #include "module.hpp" +#include "conv2d_utils.hpp" using namespace nn; -namespace nn { +namespace nn +{ -class Conv2d : public Module { + class Conv2d : public Module + { public: - Conv2d(int in_channels, int out_channels, int kernel_size, int stride = 1, int padding = 0, int dilation = 1, bool bias = true); - virtual Tensor<> forward(const Tensor<>& input) override; - virtual Tensor<> backward(const Tensor<>& grad_output) override; + Conv2d(size_t in_channels, + size_t out_channels, + var_pair kernel_size, + var_pair stride = (size_t)1, + var_pair padding = (size_t)0, + var_pair dilation = (size_t)1, + const string &padding_mode = "zeros", + bool bias = true); + + virtual Tensor<> forward(const Tensor<> &input) override; + virtual Tensor<> backward(const Tensor<> &grad_output) override; virtual void update_params(const float lr) override; - Tensor<> convolution(const Tensor<>& input, const Tensor<> filter); - Tensor<> full_convolution(const Tensor<>& input, const Tensor<> filter); - - private: - int in_channels_; - int out_channels_; - int kernel_size_; - int stride_; - int padding_; - int dilation_; - bool bias_; - Tensor<> weights_; - Tensor<> biases_; - Tensor<> grad_weights_; - Tensor<> grad_biases_; -}; + void reset_parameters(); + + void set_weight(const Tensor<> &target_weight) { this->weight_ = target_weight; } + void set_bias(const Tensor<> &target_bias) { this->bias_ = target_bias; } + const Tensor<> &get_weight() const { return this->weight_; } + const Tensor<> &get_bias() const { return this->bias_; } + + private: + size_t in_channels_; + size_t out_channels_; + size_tp2 kernel_size_; + size_tp2 stride_; + size_tp2 padding_; + size_tp2 dilation_; + bool use_bias_; + PaddingMode padding_mode_; + Padding padding_module_; + vector original_input_shape_; + Tensor<> weight_; + Tensor<> bias_; + Tensor<> grad_weight_; + Tensor<> grad_bias_; + }; } \ No newline at end of file diff --git a/include/modules/layers/flatten.hpp b/include/modules/layers/flatten.hpp new file mode 100644 index 0000000..3a06f7a --- /dev/null +++ b/include/modules/layers/flatten.hpp @@ -0,0 +1,22 @@ +#pragma once +#include "module.hpp" + +namespace nn +{ + + class Flatten : public Module + { + public: + Flatten(int64_t start_dim = 1, int64_t end_dim = -1); + + virtual Tensor<> forward(const Tensor<> &input) override; + virtual Tensor<> backward(const Tensor<> &grad_output) override; + virtual void update_params(const float lr) override; + + private: + int64_t start_dim_; + int64_t end_dim_; + vector original_input_shape_; + }; + +} \ No newline at end of file diff --git a/include/modules/layers/linear.hpp b/include/modules/layers/linear.hpp index 5265488..036ed50 100644 --- a/include/modules/layers/linear.hpp +++ b/include/modules/layers/linear.hpp @@ -1,34 +1,36 @@ #pragma once #include "module.hpp" -namespace nn { +namespace nn +{ -class Linear : public Module{ + class Linear : public Module + { public: Linear(size_t in_features, size_t out_features, bool bias); - - virtual Tensor<> forward(const Tensor<>& input) override; - virtual Tensor<> backward(const Tensor<>& grad_output) override; + + virtual Tensor<> forward(const Tensor<> &input) override; + virtual Tensor<> backward(const Tensor<> &grad_output) override; virtual void update_params(const float lr) override; - void randomizeParams(); + void reset_parameters(); // setters - inline void set_weights(const Tensor<>& desiredWeights) { this->weights_ = desiredWeights; }; - inline void set_biases(const Tensor<>& desiredBiases) { this->biases_ = desiredBiases; } + inline void set_weight(const Tensor<> &target_weight) { this->weight_ = target_weight; }; + inline void set_bias(const Tensor<> &target_bias) { this->bias_ = target_bias; } // getters - inline const Tensor<>& getWeights() const { return this->weights_; } - inline const Tensor<>& getBiases() const { return this->biases_; } + inline const Tensor<> &get_weight() const { return this->weight_; } + inline const Tensor<> &get_bias() const { return this->bias_; } private: size_t in_features_; size_t out_features_; - bool bias_; - Tensor<> weights_; - Tensor<> biases_; - Tensor<> grad_weights_; - Tensor<> grad_biases_; - }; + bool use_bias_; + Tensor<> weight_; + Tensor<> bias_; + Tensor<> grad_weight_; + Tensor<> grad_bias_; + }; } \ No newline at end of file diff --git a/include/utils/conv2d_utils.hpp b/include/utils/conv2d_utils.hpp index 40e3f05..a53b9bd 100644 --- a/include/utils/conv2d_utils.hpp +++ b/include/utils/conv2d_utils.hpp @@ -1,4 +1,34 @@ #include "tensor.hpp" using namespace std; -Tensor<> rotate_kernel(const Tensor<>& kernel); +using size_tp2 = std::pair; +using var_pair = std::variant; + +enum class PaddingMode +{ + ZEROS, + REFLECT, + REPLICATE +}; + +class Padding +{ +public: + Padding() = default; + Padding(size_tp2 padding, PaddingMode padding_mode) : padding_(padding), padding_mode_(padding_mode) {} + Tensor<> pad(const Tensor<> &input, const size_tp2 &padding) const; + Tensor<> zero_pad(const Tensor<> &input, const size_tp2 &padding) const; + +private: + size_tp2 padding_; + PaddingMode padding_mode_; +}; + +Tensor<> +convolution(const size_tp2 &stride, const size_tp2 &dilation, const vector &output_shape, const Tensor<> &input, const Tensor<> &kernel, const Tensor<> &bias, bool use_bias); + +const vector calculate_output_shape(const vector &input_shape, const int64_t out_channel, const size_tp2 &kernel_size, const size_tp2 &stride, const size_tp2 &padding, const size_tp2 &dilation); + +Tensor<> flip_vertical_and_horizontal(const Tensor<> &input); + +Tensor<> dilate_input(const Tensor<> &input, const size_tp2 &dilation); \ No newline at end of file diff --git a/include/utils/tensor_utils.hpp b/include/utils/tensor_utils.hpp index efa2b35..94681e1 100644 --- a/include/utils/tensor_utils.hpp +++ b/include/utils/tensor_utils.hpp @@ -1,6 +1,8 @@ #pragma once #include +#include #include +#include #include #include #include @@ -10,7 +12,8 @@ #include #include #include - +#include +#include using namespace std; @@ -20,11 +23,12 @@ template class Tensor; // Convert tensor to different data type -template -Tensor dtype_impl(const Tensor& tensor); +template +Tensor dtype_impl(const Tensor &tensor); // for max, min ,argmax, argmin reduction -enum class ReduceOp { +enum class ReduceOp +{ MAX, MIN, ARGMAX, @@ -32,7 +36,8 @@ enum class ReduceOp { }; // for add, subtract, multiply, divide -enum class ArithmeticOp { +enum class ArithmeticOp +{ ADD, SUB, MUL, @@ -40,50 +45,65 @@ enum class ArithmeticOp { }; // Slice struct to handle Python-like slicing -struct Slice { +struct Slice +{ int start; int stop; int step; - + Slice(int start_ = 0, int stop_ = -1, int step_ = 1) : start(start_), stop(stop_), step(step_) {} - - static Slice parse(const string& slice_str); + + static Slice parse(const string &slice_str); }; // Helper function to convert negative indices to positive size_t normalize_index(int idx, size_t dim_size); // Helper function to apply slice to a dimension -vector apply_slice(const Slice& slice, size_t dim_size); +vector apply_slice(const Slice &slice, size_t dim_size); -vector linear_to_multi_idxs(size_t idx, const vector& shape); +// Helper function to calculate the offset of the tensor given a single index +vector linear_to_multi_idxs(size_t idx, const vector &shape); // Type trait to check if a type is a std::vector -template -struct is_vector : public std::false_type {}; +template +struct is_vector : public std::false_type +{ +}; -template -struct is_vector> : public std::true_type {}; +template +struct is_vector> : public std::true_type +{ +}; // Type trait to check if a type is a std::vector -template -struct is_initializer_list : public std::false_type {}; +template +struct is_initializer_list : public std::false_type +{ +}; -template -struct is_initializer_list> : public std::true_type {}; +template +struct is_initializer_list> : public std::true_type +{ +}; // ================================================definition================================================ -template -Tensor dtype_impl(const Tensor& tensor) { +template +Tensor dtype_impl(const Tensor &tensor) +{ Tensor result; - result.shapes_ = tensor.shapes_; + + result.shape_ = tensor.shape_; + result.data_ = make_shared>(); result.data_->resize(tensor.data_->size()); - + result.strides_ = tensor.strides_; + result.offset_ = tensor.offset_; + result.size_ = tensor.size_; + std::transform(tensor.data_->begin(), tensor.data_->end(), result.data_->begin(), - [](const U& val) { return static_cast(val); }); - - result.calculate_strides(); - + [](const U &val) + { return static_cast(val); }); + return result; } \ No newline at end of file diff --git a/include/utils/utils.hpp b/include/utils/utils.hpp index 644fb92..efdd871 100644 --- a/include/utils/utils.hpp +++ b/include/utils/utils.hpp @@ -1,6 +1,7 @@ +#pragma once #include #include using namespace std; -void print_training_stats(int batch, float loss, float accuracy); -void print_training_stats_line(int batch, float loss, float accuracy); \ No newline at end of file +void print_stats(int batch, float loss, float accuracy); +void print_stats_line(int batch, float loss, float accuracy); \ No newline at end of file diff --git a/src/datasets/mnist.cpp b/src/datasets/mnist.cpp index add5f07..bb8b178 100644 --- a/src/datasets/mnist.cpp +++ b/src/datasets/mnist.cpp @@ -114,9 +114,7 @@ bool MNIST::read_labels(const string& path) { tuple, Tensor<>> Batch::to_tensor() { Tensor<> data = this->batch_data; + Tensor<> labels = this->batch_labels; - Tensor labels_int = this->batch_labels; - Tensor<> labels = labels_int.dtype(); - return make_tuple(data, labels); } \ No newline at end of file diff --git a/src/models/mlp.cpp b/src/models/mlp.cpp index 8a5fda4..f1d0700 100644 --- a/src/models/mlp.cpp +++ b/src/models/mlp.cpp @@ -3,12 +3,15 @@ #include "relu.hpp" #include "dropout.hpp" -MLP::MLP(vector layer_sizes, double dropout_p) { +MLP::MLP(vector layer_sizes, double dropout_p) +{ this->num_layers_ = layer_sizes.size(); - for (size_t i = 0; i < this->num_layers_ - 1; i++) { + for (size_t i = 0; i < this->num_layers_ - 1; i++) + { this->layers_.push_back(new Linear(layer_sizes[i], layer_sizes[i + 1], true)); - if (i < this->num_layers_ - 2) { + if (i < this->num_layers_ - 2) + { this->layers_.push_back(new ReLU()); this->layers_.push_back(new Dropout(dropout_p)); } @@ -17,34 +20,42 @@ MLP::MLP(vector layer_sizes, double dropout_p) { MLP::MLP(initializer_list layer_sizes, double dropout_p) : MLP(vector(layer_sizes), dropout_p) {} -MLP::~MLP() { - for (Module* layer : this->layers_) { +MLP::~MLP() +{ + for (Module *layer : this->layers_) + { delete layer; } } -Tensor<> MLP::forward(const Tensor<>& input) { +Tensor<> MLP::forward(const Tensor<> &input) +{ Tensor<> x = input; - for (Module* layer : this->layers_) { + for (Module *layer : this->layers_) + { x = layer->forward(x); } return x; } -Tensor<> MLP::backward(const Tensor<>& grad_output) { +Tensor<> MLP::backward(const Tensor<> &grad_output) +{ Tensor<> grad = grad_output; - for (int i = this->layers_.size() - 1; i >= 0; i--) { + for (int i = this->layers_.size() - 1; i >= 0; i--) + { grad = this->layers_[i]->backward(grad); } return grad; } -void MLP::update_params(const float lr) { - for (Module* layer : this->layers_) { +void MLP::update_params(const float lr) +{ + for (Module *layer : this->layers_) + { layer->update_params(lr); } diff --git a/src/modules/activations/softmax.cpp b/src/modules/activations/softmax.cpp index 2e2be8d..6fe51c2 100644 --- a/src/modules/activations/softmax.cpp +++ b/src/modules/activations/softmax.cpp @@ -2,21 +2,32 @@ #include "softmax.hpp" using namespace nn; -Tensor<> Softmax::softmax_helper(const Tensor<>& input) { - Tensor<> result = input.map([](double x) { return exp(x); }); +Softmax::Softmax() +{ + cout << "Starting Softmax" << endl; + cout << "Softmax initialized" << endl; +} + +Tensor<> Softmax::softmax_helper(const Tensor<> &input) +{ + Tensor<> result = input.map([](double x) + { return exp(x); }); double sum = result.sum(); return result * (1 / sum); } -vector Softmax::softmax_helper(const vector& input) { +vector Softmax::softmax_helper(const vector &input) +{ double sum = 0.0f; vector result; - for (size_t i = 0; i < input.size(); i++) { + for (size_t i = 0; i < input.size(); i++) + { sum += exp(input[i]); } - for (size_t i = 0; i < input.size(); i++) { + for (size_t i = 0; i < input.size(); i++) + { result.push_back(exp(input[i]) / sum); } @@ -24,21 +35,32 @@ vector Softmax::softmax_helper(const vector& input) { } // Only support 1D and 2D Tensors -Tensor<> Softmax::forward(const Tensor<>& input) { +Tensor<> Softmax::forward(const Tensor<> &input) +{ // In softmax case, we don't have to store the input as it is not used in the backward pass // Instead, we store the softmax(input) - if (input.ndim() == 1) { + if (input.ndim() == 1) + { return this->softmax_helper(input); } + // const size_t leading_ndim = input.ndim() - 2; + + // vector leading_shape(input.shapes().begin(), input.shapes().end() - 2); + + // const size_t n = input.shapes()[leading_ndim]; + // const size_t m = input.shapes()[leading_ndim + 1]; + vector> softmax_input; - for (size_t i = 0; i < input.shapes()[0]; i++) { + for (size_t i = 0; i < input.shapes()[0]; i++) + { vector input_row; input_row.reserve(input.shapes()[1]); - for (size_t j = 0; j < input.shapes()[1]; j++) { + for (size_t j = 0; j < input.shapes()[1]; j++) + { input_row.push_back(input[i, j]); } softmax_input.push_back(this->softmax_helper(input_row)); @@ -49,7 +71,8 @@ Tensor<> Softmax::forward(const Tensor<>& input) { return this->softmax_input_cache_; } -Tensor<> Softmax::backward(const Tensor<>& grad_output) { +Tensor<> Softmax::backward(const Tensor<> &grad_output) +{ Tensor<> softmax_grad; return softmax_grad; diff --git a/src/modules/layers/conv2d.cpp b/src/modules/layers/conv2d.cpp index d1d44d0..5e7a87d 100644 --- a/src/modules/layers/conv2d.cpp +++ b/src/modules/layers/conv2d.cpp @@ -1,12 +1,235 @@ +#include +#include #include "conv2d.hpp" using namespace nn; -Conv2d::Conv2d(int in_channels, int out_channels, int kernel_size, int stride, int padding, int dilation, bool bias) { +Conv2d::Conv2d(size_t in_channels, + size_t out_channels, + var_pair kernel_size, + var_pair padding, + var_pair stride, + var_pair dilation, + const string &padding_mode, + bool bias) +{ this->in_channels_ = in_channels; this->out_channels_ = out_channels; - this->kernel_size_ = kernel_size; - this->stride_ = stride; - this->padding_ = padding; - this->dilation_ = dilation; - this->bias_ = bias; + this->use_bias_ = bias; + + // Helper lambda to process variant parameters + auto process_variant = [](auto &&arg) -> size_tp2 + { + using T = std::decay_t; + if constexpr (std::is_same_v) + { + if (arg < 0) + { + throw std::invalid_argument("Negative kernel size, stride, padding, or dilation is not supported"); + } + return {arg, arg}; + } + else + { + static_assert(std::is_same_v, "Unexpected type in variant"); + return arg; + } + }; + + // Set kernel size, stride, padding, and dilation + this->kernel_size_ = std::visit(process_variant, kernel_size); + this->stride_ = std::visit(process_variant, stride); + this->padding_ = std::visit(process_variant, padding); + this->dilation_ = std::visit(process_variant, dilation); + + // cout << "Kernel Size : " << this->kernel_size_.first << ", " << this->kernel_size_.second << endl; + // cout << "Stride : " << this->stride_.first << ", " << this->stride_.second << endl; + // cout << "Padding : " << this->padding_.first << ", " << this->padding_.second << endl; + // cout << "Dilation : " << this->dilation_.first << ", " << this->dilation_.second << endl; + + // Check if padding mode is valid + unordered_map all_padding_modes = {{"zeros", PaddingMode::ZEROS}, {"reflect", PaddingMode::REFLECT}, {"replicate", PaddingMode::REPLICATE}}; + + if (all_padding_modes.find(padding_mode) == all_padding_modes.end()) + { + throw std::invalid_argument("Padding mode must be one of 'zeros', 'reflect', or 'replicate'"); + } + + // Set padding mode + this->padding_mode_ = all_padding_modes[padding_mode]; + this->padding_module_ = Padding(this->padding_, this->padding_mode_); + + // Initialize weights and bias + vector weight_shape = {this->out_channels_, this->in_channels_, this->kernel_size_.first, this->kernel_size_.second}; + + this->weight_ = Tensor<>(weight_shape, 0.0); + + if (this->use_bias_) + { + vector bias_shape = {this->out_channels_}; + this->bias_ = Tensor<>(bias_shape, 0.0); + } + + // randomize the weights and bias based on PyTorch implementation + this->reset_parameters(); +} + +Tensor<> Conv2d::forward(const Tensor<> &input) +{ + Tensor<> input_data = input; + this->original_input_shape_ = input.shapes(); + + const vector &output_shape = calculate_output_shape(input.shapes(), this->out_channels_, this->kernel_size_, this->stride_, this->padding_, this->dilation_); + + if (this->padding_.first > 0 && this->padding_.second > 0) + { + input_data = this->padding_module_.pad(input_data, this->padding_); + } + + // this input is the padded version of the original input + this->input_cache_ = input_data; + + return convolution(this->stride_, this->dilation_, output_shape, input_data, this->weight_, this->bias_, this->use_bias_); +} + +Tensor<> Conv2d::backward(const Tensor<> &grad_output) +{ + // dL_dY = grad_output + + // dL_dW = conv(input_data, dL_dY) + Tensor<> permuted_input = this->input_cache_.permute(1, 0, 2, 3); + Tensor<> permuted_grad_output = grad_output.permute(1, 0, 2, 3); + + // The grad weight shape is initially permuted + const vector permuted_grad_weight_shape = {this->in_channels_, this->out_channels_, this->kernel_size_.first, this->kernel_size_.second}; + + this->grad_weight_ = convolution(this->dilation_, this->stride_, permuted_grad_weight_shape, permuted_input, permuted_grad_output, Tensor<>(), false); + + cout << "grad_weight: " << endl; + this->grad_weight_.print(); + cout << endl; + + // The grad weight shape is permuted back to the original shape + this->grad_weight_ = this->grad_weight_.permute(1, 0, 2, 3); + + // dL_dB = sum(dL_dY, dims=(0, 2, 3)) + if (this->use_bias_) + { + this->grad_bias_ = Tensor<>({this->out_channels_}, 0.0); + for (size_t i = 0; i < grad_output.shapes()[0]; i++) + { + for (size_t j = 0; j < grad_output.shapes()[1]; j++) + { + for (size_t k = 0; k < grad_output.shapes()[2]; k++) + { + for (size_t l = 0; l < grad_output.shapes()[3]; l++) + { + this->grad_bias_[j] += grad_output[i, j, k, l]; + } + } + } + } + + cout << "grad_bias: " << endl; + this->grad_bias_.print(); + cout << endl; + } + + // dL_dX = fullconv(dL_dY, W) + Tensor<> flipped_weight = flip_vertical_and_horizontal(this->weight_); + cout << "flipped_weight: " << endl; + flipped_weight.print(); + cout << endl; + + + Tensor<> permuted_flipped_weight = flipped_weight.permute(1, 0, 2, 3); + + cout << "permuted_flipped_weight: " << endl; + permuted_flipped_weight.print(); + cout << endl; + + Tensor<> copy_grad_output = grad_output; + + if (this->stride_.first > 1 || this->stride_.second > 1) + { + copy_grad_output = dilate_input(copy_grad_output, this->stride_); + } + + const size_t H_further_pad = (this->kernel_size_.first - 1) * this->dilation_.first - this->padding_.first; + const size_t W_further_pad = (this->kernel_size_.second - 1) * this->dilation_.second - this->padding_.second; + + if (H_further_pad > 0 && W_further_pad > 0) + { + copy_grad_output = this->padding_module_.pad(copy_grad_output, {H_further_pad, W_further_pad}); + } + else if (H_further_pad < 0 && W_further_pad < 0) + { + permuted_flipped_weight = this->padding_module_.pad(permuted_flipped_weight, {-H_further_pad, -W_further_pad}); + } + else + { + throw std::invalid_argument("The further padding for dL/dX is not correct"); + } + + Tensor<> grad_input = convolution({1, 1}, this->dilation_, this->original_input_shape_, copy_grad_output, permuted_flipped_weight, Tensor<>(), false); + + return grad_input; +} + +void Conv2d::update_params(const float lr) +{ + this->weight_ -= this->grad_weight_ * lr; + + if (this->use_bias_) + { + this->bias_ -= this->grad_bias_ * lr; + } + + return; +} + +void Conv2d::reset_parameters() +{ + /* + PyTorch implementation: + + n = self.in_channels + for k in self.kernel_size: + n *= k + stdv = 1. / math.sqrt(n) + self.weight.data.uniform_(-stdv, stdv) + if self.bias is not None: + self.bias.data.uniform_(-stdv, stdv) + */ + + size_t n = this->in_channels_; + n *= this->kernel_size_.first * this->kernel_size_.second; + + const double stdv = 1.0 / sqrt(n); + + // Set up the random number generator + random_device rd; + mt19937 gen(rd()); + uniform_real_distribution dis(-stdv, stdv); + + for (size_t i = 0; i < this->out_channels_; i++) + { + for (size_t j = 0; j < this->in_channels_; j++) + { + for (size_t k = 0; k < this->kernel_size_.first; k++) + { + for (size_t l = 0; l < this->kernel_size_.second; l++) + { + this->weight_[i, j, k, l] = dis(gen); + } + } + } + } + + if (this->use_bias_) + { + for (size_t i = 0; i < this->out_channels_; i++) + { + this->bias_[i] = dis(gen); + } + } } diff --git a/src/modules/layers/flatten.cpp b/src/modules/layers/flatten.cpp new file mode 100644 index 0000000..2c06798 --- /dev/null +++ b/src/modules/layers/flatten.cpp @@ -0,0 +1,25 @@ +#include "flatten.hpp" +using namespace nn; + +Flatten::Flatten(int64_t start_dim, int64_t end_dim) : start_dim_(start_dim), end_dim_(end_dim) +{ + cout << "Flatten layer initialized with start_dim = " << start_dim << " and end_dim = " << end_dim << endl; +} + +Tensor<> Flatten::forward(const Tensor<> &input) +{ + this->original_input_shape_ = input.shapes(); + + return input.flatten(this->start_dim_, this->end_dim_); +} + +Tensor<> Flatten::backward(const Tensor<> &grad_output) +{ + return grad_output.reshape(this->original_input_shape_); +} + +void Flatten::update_params(const float lr) +{ + // we don't need to update any parameters + return; +} diff --git a/src/modules/layers/linear.cpp b/src/modules/layers/linear.cpp index c1b01b0..fa1ee40 100644 --- a/src/modules/layers/linear.cpp +++ b/src/modules/layers/linear.cpp @@ -3,94 +3,117 @@ #include "linear.hpp" using namespace nn; -Linear::Linear(size_t in_features, size_t out_features, bool bias) : in_features_(in_features), out_features_(out_features), bias_(bias) { - this->weights_ = Tensor<>({in_features, out_features}, 0.0f); - - if (bias) { - this->biases_ = Tensor<>({out_features, 1}, 0.0f); - } - else { - this->biases_ = Tensor<>(); - } +Linear::Linear(size_t in_features, size_t out_features, bool bias) : in_features_(in_features), out_features_(out_features), use_bias_(bias) +{ + this->weight_ = Tensor<>({in_features, out_features}, 0.0f); - // randomize the weights. The bias is originally 0. - this->randomizeParams(); + if (this->use_bias_) + { + this->bias_ = Tensor<>({out_features, 1}, 0.0f); + } - this->grad_weights_ = Tensor<>({in_features, out_features}, 0.0f);; - this->grad_biases_ = Tensor<>({out_features, 1}, 0.0f); + // randomize the weights and bias based on PyTorch implementation + this->reset_parameters(); - this->input_cache_ = Tensor<>(); + cout << "Linear layer initialized with in_features = " << in_features << " and out_features = " << out_features << endl; + cout << &this->input_cache_ << endl; } -Tensor<> Linear::forward(const Tensor<>& input) { +Tensor<> Linear::forward(const Tensor<> &input) +{ this->input_cache_ = input; size_t batchSize = input.shapes()[0]; - const Tensor<>& XW = input.matmul(this->weights_); + const Tensor<> &XW = input.matmul(this->weight_); - if (!this->bias_) { + if (!this->use_bias_) + { return XW; } Tensor<> biases_repeated = Tensor<>({batchSize, this->out_features_}, 0.0f); - for (size_t i = 0; i < batchSize; i++) { - for (size_t j = 0; j < this->out_features_; j++) { - biases_repeated[i, j] = this->biases_[j, 0]; + for (size_t i = 0; i < batchSize; i++) + { + for (size_t j = 0; j < this->out_features_; j++) + { + biases_repeated[i, j] = this->bias_[j, 0]; } } return XW + biases_repeated; } -Tensor<> Linear::backward(const Tensor<>& grad_output) { +Tensor<> Linear::backward(const Tensor<> &grad_output) +{ // dL/dY = grad_output // dL/dW = X^T * dL/dY - this->grad_weights_ = this->input_cache_.transpose().matmul(grad_output); + this->grad_weight_ = this->input_cache_.transpose().matmul(grad_output); // cout << endl << "dL/dW: " << endl; - // this->grad_weights_.print(); + // this->grad_weight_.print(); // cout << endl; // dL/dX = dL/dY * W^T - Tensor<> grad_input = grad_output.matmul(this->weights_.transpose()); + Tensor<> grad_input = grad_output.matmul(this->weight_.transpose()); /* dL/db = dL/dY^T * 1_B (1_B is a vector of ones of size batchSize) dL/db = dL/dY.sum(axis=0) */ - if (this->bias_) - this->grad_biases_ = grad_output.transpose().matmul(Tensor<>({grad_output.shapes()[0], 1}, 1.0f)); + if (this->use_bias_) + this->grad_bias_ = grad_output.transpose().matmul(Tensor<>({grad_output.shapes()[0], 1}, 1.0f)); // cout << endl << "dL/db: " << endl; - // this->grad_biases_.print(); + // this->grad_bias_.print(); // cout << endl; return grad_input; } -void Linear::update_params(const float lr) { +void Linear::update_params(const float lr) +{ - this->weights_ -= this->grad_weights_ * lr; - this->biases_ -= this->grad_biases_ * lr; + this->weight_ -= this->grad_weight_ * lr; + this->bias_ -= this->grad_bias_ * lr; return; } -void Linear::randomizeParams() { +void Linear::reset_parameters() +{ + /* + PyTorch implementation: + + stdv = 1. / math.sqrt(self.weight.size(1)) + self.weight.data.uniform_(-stdv, stdv) + if self.bias is not None: + self.bias.data.uniform_(-stdv, stdv) + + */ // Calculate the limit for the uniform distribution - double limit = sqrt(6.0f / (this->in_features_ + this->out_features_)); + const double stdv = 1.0 / sqrt(this->weight_.shapes()[0]); // since the weight is transposed // Set up the random number generator random_device rd; mt19937 gen(rd()); - uniform_real_distribution dis(-limit, limit); + uniform_real_distribution dis(-stdv, stdv); // Xavier initialization - for (size_t i = 0; i < this->in_features_; i++) { - for (size_t j = 0; j < this->out_features_; j++) { - this->weights_[i, j] = dis(gen); + for (size_t i = 0; i < this->in_features_; i++) + { + for (size_t j = 0; j < this->out_features_; j++) + { + this->weight_[i, j] = dis(gen); + } + } + + if (this->use_bias_) + { + for (size_t i = 0; i < this->out_features_; i++) + { + this->bias_[i, 0] = dis(gen); } } } \ No newline at end of file diff --git a/src/modules/losses/cross_entropy.cpp b/src/modules/losses/cross_entropy.cpp index 002b585..b20ed72 100644 --- a/src/modules/losses/cross_entropy.cpp +++ b/src/modules/losses/cross_entropy.cpp @@ -3,13 +3,16 @@ #include using namespace nn; -CrossEntropyLoss::CrossEntropyLoss() { - this->softmax_ = Softmax(); +CrossEntropyLoss::CrossEntropyLoss() +{ + cout << "Starting CrossEntropyLoss" << endl; + cout << "CrossEntropyLoss initialized" << endl; } -double CrossEntropyLoss::forward(const Tensor<>& Y_hat, const Tensor<>& Y) { +double CrossEntropyLoss::forward(const Tensor<> &Y_hat, const Tensor<> &Y) +{ /* - L = 1 / B \sum_{i=1}^B \sum_{j=1}^M Y_{ij} * log(softmax(Y_hat_{ij)}) + L = 1 / B \sum_{i=1}^B \sum_{j=1}^M Y_{ij} * log(softmax(Y_hat_{ij})) R^B x M, Y R^B x M @@ -18,14 +21,17 @@ double CrossEntropyLoss::forward(const Tensor<>& Y_hat, const Tensor<>& Y) { // We don't have to store the Y_hat as it is not used in the backward pass. Instead, we store the softmax(Y_hat) // Note that this->Y_cache_ is just a vector with label, and it is not a matrix with one-hot vectors. - if (Y.ndim() == 2) { + if (Y.ndim() == 2) + { // In this case, we assume Y is a matrix of one-hot vectors. So we can just store the index of the correct label this->Y_cache_ = Y.argmax().dtype(); } - else if (Y.ndim() == 1) { + else if (Y.ndim() == 1) + { this->Y_cache_ = Y; } - else { + else + { throw std::runtime_error("Currently, Cross Entropy Loss does not support label with more than 2 dimensions."); } @@ -40,7 +46,8 @@ double CrossEntropyLoss::forward(const Tensor<>& Y_hat, const Tensor<>& Y) { // sum up all the elements double loss_without_factor = 0.0f; - for (int i = 0; i < B; ++i) { + for (int i = 0; i < B; ++i) + { // Y_{ij} * log(softmax(Y_hat_{ij})) loss_without_factor += log(softmax_Y_hat[i, static_cast(this->Y_cache_[i])]); } @@ -48,7 +55,8 @@ double CrossEntropyLoss::forward(const Tensor<>& Y_hat, const Tensor<>& Y) { return loss_without_factor * factor; } -Tensor<> CrossEntropyLoss::backward() { +Tensor<> CrossEntropyLoss::backward() +{ /* dL/dY_hat should have the same shape as Y_hat @@ -68,7 +76,8 @@ Tensor<> CrossEntropyLoss::backward() { Since Y is a matrix of one-hot vectors, only the correct label is 1 and the rest are 0 */ - for (int i = 0; i < B; ++i) { + for (int i = 0; i < B; ++i) + { grad_output[i, static_cast(this->Y_cache_[i])] -= 1.0f; } diff --git a/src/utils/conv2d_utils.cpp b/src/utils/conv2d_utils.cpp new file mode 100644 index 0000000..457e971 --- /dev/null +++ b/src/utils/conv2d_utils.cpp @@ -0,0 +1,260 @@ +#include "conv2d_utils.hpp" + +Tensor<> Padding::pad(const Tensor<> &input, const size_tp2 &padding) const +{ + switch (this->padding_mode_) + { + case PaddingMode::ZEROS: + return this->zero_pad(input, padding); + break; + default: + throw std::invalid_argument("Invalid padding mode"); + } +} + +Tensor<> Padding::zero_pad(const Tensor<> &input, const size_tp2 &padding) const +{ + const vector &input_shape = input.shapes(); + + if (input_shape.size() != 4) + { + throw std::invalid_argument("Input shape must be 4D"); + } + + const size_t B = input_shape[0]; + const size_t C = input_shape[1]; + const size_t H = input_shape[2]; + const size_t W = input_shape[3]; + + const size_t padded_H = H + padding.first * 2; + const size_t padded_W = W + padding.second * 2; + + Tensor<> padded_output({B, C, padded_H, padded_W}, 0.0); + + for (size_t b = 0; b < B; ++b) + { + for (size_t c = 0; c < C; ++c) + { + for (size_t h = 0; h < H; ++h) + { + for (size_t w = 0; w < W; ++w) + { + padded_output[b, c, h + padding.first, w + padding.second] = input[b, c, h, w]; + } + } + } + } + + return padded_output; +} + +Tensor<> convolution(const size_tp2 &stride, const size_tp2 &dilation, const vector &output_shape, const Tensor<> &input, const Tensor<> &kernel, const Tensor<> &bias, bool use_bias) +{ + const vector &input_shape = input.shapes(); + const vector &kernel_shape = kernel.shapes(); + + if (output_shape.size() != 4) + { + throw std::invalid_argument("Output shape must be 4D"); + } + if (input_shape.size() != 4) + { + throw std::invalid_argument("Input shape must be 4D"); + } + if (kernel_shape.size() != 4) + { + throw std::invalid_argument("Kernel shape must be 4D"); + } + + const size_t B = output_shape[0]; + const size_t C_out = output_shape[1]; + const size_t H_out = output_shape[2]; + const size_t W_out = output_shape[3]; + + const size_t C_in = input_shape[1]; + const size_t H_in = input_shape[2]; + const size_t W_in = input_shape[3]; + + const size_t K_H = kernel_shape[2]; + const size_t K_W = kernel_shape[3]; + + Tensor<> output(output_shape, 0.0); + + /* + The logic behind is that + Let's us first focus on the first kernel among all out_channel kernels + + Each input channel of the data is convolved with the same channel of the kernel, and the result is added to the output + Meaning that each input data channel only corresponds to the same channel of the kernel + + For example, the channel 1 of the input data is convolved with the channel 1 of the kernel, but it will not be convolved with the channel 2 of the kernel + + After each input data channel convolving with the same channel of the kernel, element-wise addition is performed among all the convolved result with the first kernel + + Now we get a single output channel + + We repeat this process for all the out_channel channels + + And finally we will get an output with out_channel channels + */ + + for (size_t b = 0; b < B; ++b) + { + for (size_t c = 0; c < C_out; ++c) + { + for (size_t h = 0; h < H_out; ++h) + { + for (size_t w = 0; w < W_out; ++w) + { + size_t h_start = h * stride.first; + size_t w_start = w * stride.second; + + for (size_t ic = 0; ic < C_in; ++ic) + { + for (size_t kh = 0; kh < K_H; ++kh) + { + for (size_t kw = 0; kw < K_W; ++kw) + { + size_t h_in = h_start + kh * dilation.first; + size_t w_in = w_start + kw * dilation.second; + + if (h_in >= 0 && h_in < H_in && w_in >= 0 && w_in < W_in) + { + output[b, c, h, w] += input[b, ic, h_in, w_in] * kernel[c, ic, kh, kw]; + } + } + } + } + + if (use_bias) + { + output[b, c, h, w] += bias[c]; + } + } + } + } + } + + return output; +} + +/** + * Calculate the output shape of a 2D convolutional layer. + * + * @param input_shape The shape of the input tensor, which is a 4D tensor with shape (B, C_in, H_in, W_in). + * @param out_channel The number of output channels. + * @param kernel_size The size of the kernel, which is a 2D integer pair. + * @param stride The stride of the convolution, which is a 2D integer pair. + * @param padding The padding of the convolution, which is a 2D integer pair. + * @param dilation The dilation of the convolution, which is a 2D integer pair. + * @return The output shape, which is a 4D vector with shape (B, out_channel, H_out, W_out). + * + * @throws std::invalid_argument if input_shape is not 4D or if the output shape is invalid. + */ +const vector calculate_output_shape(const vector &input_shape, const int64_t out_channel, const size_tp2 &kernel_size, const size_tp2 &stride, const size_tp2 &padding, const size_tp2 &dilation) +{ + if (input_shape.size() != 4) + { + throw std::invalid_argument("Input shape must be 4D"); + } + + const size_t B = input_shape[0]; + const size_t H_in = input_shape[2]; + const size_t W_in = input_shape[3]; + + cout << "Btach Size : " << B << endl; + cout << "H_in : " << H_in << endl; + cout << "W_in : " << W_in << endl; + cout << "Out Channel : " << out_channel << endl; + cout << "Kernel Size : " << kernel_size.first << ", " << kernel_size.second << endl; + cout << "Stride : " << stride.first << ", " << stride.second << endl; + cout << "Padding : " << padding.first << ", " << padding.second << endl; + cout << "Dilation : " << dilation.first << ", " << dilation.second << endl; + + const int64_t H_out = (H_in + 2 * padding.first - dilation.first * (kernel_size.first - 1) - 1) / stride.first + 1; + const int64_t W_out = (W_in + 2 * padding.second - dilation.second * (kernel_size.second - 1) - 1) / stride.second + 1; + + if (H_out <= 0 || W_out <= 0) + { + throw std::invalid_argument("Invalid output shape"); + } + + return {B, (size_t)out_channel, (size_t)H_out, (size_t)W_out}; +} + +Tensor<> flip_vertical_and_horizontal(const Tensor<> &input) +{ + if (input.ndim() != 4) + { + throw std::invalid_argument("Input shape must be 4D"); + } + + Tensor<> output = input; + + const size_t B = input.shapes()[0]; + const size_t C = input.shapes()[1]; + const size_t H = input.shapes()[2]; + const size_t W = input.shapes()[3]; + + double cache; + + for (size_t b = 0; b < B; ++b) + { + for (size_t c = 0; c < C; ++c) + { + for (size_t h = 0; h < H / 2; ++h) + { + for (size_t w = 0; w < W; ++w) + { + cache = output[b, c, h, w]; + output[b, c, h, w] = output[b, c, H - h - 1, w]; + output[b, c, H - h - 1, w] = cache; + } + } + for (size_t h = 0; h < H; ++h) + { + for (size_t w = 0; w < W / 2; ++w) + { + cache = output[b, c, h, w]; + output[b, c, h, w] = output[b, c, h, W - w - 1]; + output[b, c, h, W - w - 1] = cache; + } + } + } + } + + return output; +} +Tensor<> dilate_input(const Tensor<> &input, const size_tp2 &dilation) +{ + if (input.ndim() != 4) + { + throw std::invalid_argument("Input shape must be 4D"); + } + + const size_t B = input.shapes()[0]; + const size_t C = input.shapes()[1]; + const size_t H = input.shapes()[2]; + const size_t W = input.shapes()[3]; + + const size_t H_dilated = H + (H - 1) * (dilation.first - 1); + const size_t W_dilated = W + (W - 1) * (dilation.second - 1); + + Tensor<> dilated_input({B, C, H_dilated, W_dilated}, 0.0); + + for (size_t b = 0; b < B; ++b) + { + for (size_t c = 0; c < C; ++c) + { + for (size_t h = 0; h < H; ++h) + { + for (size_t w = 0; w < W; ++w) + { + dilated_input[b, c, h * dilation.first, w * dilation.second] = input[b, c, h, w]; + } + } + } + } + + return dilated_input; +} \ No newline at end of file diff --git a/src/utils/tensor_utils.cpp b/src/utils/tensor_utils.cpp index 1669905..0baa8a6 100644 --- a/src/utils/tensor_utils.cpp +++ b/src/utils/tensor_utils.cpp @@ -55,7 +55,6 @@ vector apply_slice(const Slice& slice, size_t dim_size) { // cout << "start applying slice" << endl; for (size_t i = start; i < stop; i += step) { - // cout << "i: " << i << endl; indices.push_back(i); } return indices; diff --git a/src/utils/utils.cpp b/src/utils/utils.cpp index 6a90dfe..e4de46b 100644 --- a/src/utils/utils.cpp +++ b/src/utils/utils.cpp @@ -1,15 +1,17 @@ #include "utils.hpp" -void print_training_stats(int batch, float loss, float accuracy) { +void print_stats(int batch, float loss, float accuracy) +{ cout << "\rBatch " << setw(4) << batch << " " - << "Loss: " << fixed << setprecision(5) << setw(8) << loss << " " - << "Accuracy: " << fixed << setprecision(2) << setw(6) << accuracy * 100 << "%" - << flush; + << "Loss: " << fixed << setprecision(5) << setw(8) << loss << " " + << "Accuracy: " << fixed << setprecision(2) << setw(6) << accuracy * 100 << "%" + << flush; } -void print_training_stats_line(int batch, float loss, float accuracy) { +void print_stats_line(int batch, float loss, float accuracy) +{ cout << "Batch " << setw(4) << batch << " " - << "Loss: " << fixed << setprecision(5) << setw(8) << loss << " " - << "Accuracy: " << fixed << setprecision(2) << setw(6) << accuracy * 100 << "%" - << endl; + << "Loss: " << fixed << setprecision(5) << setw(8) << loss << " " + << "Accuracy: " << fixed << setprecision(2) << setw(6) << accuracy * 100 << "%" + << endl; } \ No newline at end of file diff --git a/tests/core/tensor_test.cpp b/tests/core/tensor_test.cpp index 955ecb0..9fb0c31 100644 --- a/tests/core/tensor_test.cpp +++ b/tests/core/tensor_test.cpp @@ -3,12 +3,14 @@ #include "tensor.hpp" #include "math.h" -TEST_CASE("TensorTest - Constructor and Destructor") { +TEST_CASE("TensorTest - Constructor and Destructor") +{ Tensor<> tensor; // No explicit assertions needed, just verify no crashes } -TEST_CASE("TensorTest - Scaler Constructor") { +TEST_CASE("TensorTest - Scaler Constructor") +{ Tensor<> tensor(10.0f); CHECK(tensor.ndim() == 1); CHECK(tensor.size() == 1); @@ -16,7 +18,8 @@ TEST_CASE("TensorTest - Scaler Constructor") { CHECK(tensor[0] == 10); } -TEST_CASE("TensorTest - 1D Tensor Constructor from initializer_list") { +TEST_CASE("TensorTest - 1D Tensor Constructor from initializer_list") +{ Tensor<> tensor_1d = {1.0f, 2.0f, 3.0f, 4.0f}; CHECK(tensor_1d.ndim() == 1); CHECK(tensor_1d.size() == 4); @@ -33,7 +36,8 @@ TEST_CASE("TensorTest - 1D Tensor Constructor from initializer_list") { CHECK(tensor_1d_1val[0] == 0.0f); } -TEST_CASE("TensorTest - 2D Tensor Constructor from initializer_list") { +TEST_CASE("TensorTest - 2D Tensor Constructor from initializer_list") +{ Tensor<> tensor_2d = {{1.0f, 2.0f}, {3.0f, 4.0f}}; CHECK(tensor_2d.ndim() == 2); CHECK(tensor_2d.size() == 4); @@ -43,7 +47,7 @@ TEST_CASE("TensorTest - 2D Tensor Constructor from initializer_list") { CHECK(tensor_2d[0, 1] == 2.0f); CHECK(tensor_2d[1, 0] == 3.0f); CHECK(tensor_2d[1, 1] == 4.0f); - + Tensor<> tensor_2d_1row = {{0.0f, 0.0f}}; CHECK(tensor_2d_1row.ndim() == 2); CHECK(tensor_2d_1row.size() == 2); @@ -61,7 +65,8 @@ TEST_CASE("TensorTest - 2D Tensor Constructor from initializer_list") { // CHECK(tensor_2d_1col[1, 0] == 0.0f); } -TEST_CASE("TensorTest - 3D Tensor Constructor from initializer_list") { +TEST_CASE("TensorTest - 3D Tensor Constructor from initializer_list") +{ Tensor<> tensor = {{{1.0f, 2.0f}, {3.0f, 4.0f}}, {{5.0f, 6.0f}, {7.0f, 8.0f}}}; CHECK(tensor.ndim() == 3); CHECK(tensor.size() == 8); @@ -87,7 +92,8 @@ TEST_CASE("TensorTest - 3D Tensor Constructor from initializer_list") { CHECK(tensor2[1, 1, 1] == 0.0f); } -TEST_CASE("TensorTest - 1D Tensor Constructor from vector") { +TEST_CASE("TensorTest - 1D Tensor Constructor from vector") +{ vector data = {1.0f, 2.0f, 3.0f, 4.0f}; Tensor<> tensor1 = data; CHECK(tensor1.ndim() == 1); @@ -106,7 +112,8 @@ TEST_CASE("TensorTest - 1D Tensor Constructor from vector") { CHECK(tensor2[0] == 0.0f); } -TEST_CASE("TensorTest - 2D Tensor Constructor from vector") { +TEST_CASE("TensorTest - 2D Tensor Constructor from vector") +{ vector> data = {{1.0f, 2.0f}, {3.0f, 4.0f}}; Tensor<> tensor = data; CHECK(tensor.ndim() == 2); @@ -117,7 +124,7 @@ TEST_CASE("TensorTest - 2D Tensor Constructor from vector") { CHECK(tensor[0, 1] == 2.0f); CHECK(tensor[1, 0] == 3.0f); CHECK(tensor[1, 1] == 4.0f); - + vector> data2 = {{0.0f, 0.0f}}; Tensor<> tensor2 = data2; CHECK(tensor2.ndim() == 2); @@ -128,7 +135,8 @@ TEST_CASE("TensorTest - 2D Tensor Constructor from vector") { CHECK(tensor2[0, 1] == 0.0f); } -TEST_CASE("TensorTest - 3D Tensor Constructor from vector") { +TEST_CASE("TensorTest - 3D Tensor Constructor from vector") +{ vector>> data = {{{1.0f, 2.0f}, {3.0f, 4.0f}}, {{5.0f, 6.0f}, {7.0f, 8.0f}}}; Tensor<> tensor = data; CHECK(tensor.ndim() == 3); @@ -156,7 +164,8 @@ TEST_CASE("TensorTest - 3D Tensor Constructor from vector") { CHECK(tensor2[1, 1, 1] == 0.0f); } -TEST_CASE("TensorTest - 4D Tensor Constructor from vector") { +TEST_CASE("TensorTest - 4D Tensor Constructor from vector") +{ vector>>> data = {{{{1.0f, 2.0f}, {3.0f, 4.0f}}, {{5.0f, 6.0f}, {7.0f, 8.0f}}}, {{{9.0f, 10.0f}, {11.0f, 12.0f}}, {{13.0f, 14.0f}, {15.0f, 16.0f}}}}; Tensor<> tensor = data; CHECK(tensor.ndim() == 4); @@ -172,7 +181,8 @@ TEST_CASE("TensorTest - 4D Tensor Constructor from vector") { CHECK(tensor[1, 1, 1, 1] == 16.0f); } -TEST_CASE("TensorTest - Copy Constructor") { +TEST_CASE("TensorTest - Copy Constructor") +{ // 1D tensor Tensor<> tensor1 = {1.0f, 2.0f, 3.0f, 4.0f}; Tensor<> test_tensor = tensor1; @@ -219,7 +229,8 @@ TEST_CASE("TensorTest - Copy Constructor") { CHECK(test_tensor[1, 1, 1] == 8.0f); } -TEST_CASE("TensorTest - Move Constructor") { +TEST_CASE("TensorTest - Move Constructor") +{ // 1D tensor Tensor<> tensor1 = {1.0f, 2.0f, 3.0f, 4.0f}; Tensor<> test_tensor = std::move(tensor1); @@ -266,7 +277,8 @@ TEST_CASE("TensorTest - Move Constructor") { CHECK(test_tensor[1, 1, 1] == 8.0f); } -TEST_CASE("TensorTest - Certain Value Constructor") { +TEST_CASE("TensorTest - Certain Value Constructor") +{ Tensor<> tensor_1d({1}, 0.0f); CHECK(tensor_1d.ndim() == 1); CHECK(tensor_1d.size() == 1); @@ -283,7 +295,6 @@ TEST_CASE("TensorTest - Certain Value Constructor") { CHECK(tensor_2d[1, 0] == 10.0f); CHECK(tensor_2d[1, 1] == 10.0f); - Tensor<> tensor_3d({2, 2, 2}, 5.0f); CHECK(tensor_3d.ndim() == 3); CHECK(tensor_3d.size() == 8); @@ -297,7 +308,8 @@ TEST_CASE("TensorTest - Certain Value Constructor") { CHECK(tensor_3d[1, 1, 1] == 5.0f); } -TEST_CASE("TensorTest - Indexing Operator") { +TEST_CASE("TensorTest - Indexing Operator") +{ Tensor<> tensor = {1.0f, 2.0f, 3.0f, 4.0f}; CHECK(tensor[0] == 1.0f); CHECK(tensor[1] == 2.0f); @@ -318,7 +330,8 @@ TEST_CASE("TensorTest - Indexing Operator") { CHECK(tensor[1, 1, 1] == 8.0f); } -TEST_CASE("TensorTest - Indexing Operator - Out of Bound") { +TEST_CASE("TensorTest - Indexing Operator - Out of Bound") +{ Tensor<> tensor = {1.0f, 2.0f, 3.0f, 4.0f}; CHECK_THROWS(tensor[4]); @@ -332,7 +345,8 @@ TEST_CASE("TensorTest - Indexing Operator - Out of Bound") { CHECK_THROWS(tensor[0, 0, 2]); } -TEST_CASE("TensorTest - Indexing Operator - Negative Indexing") { +TEST_CASE("TensorTest - Indexing Operator - Negative Indexing") +{ Tensor<> tensor = {1.0f, 2.0f, 3.0f, 4.0f}; CHECK(tensor[-1] == 4.0f); CHECK(tensor[-2] == 3.0f); @@ -350,7 +364,8 @@ TEST_CASE("TensorTest - Indexing Operator - Negative Indexing") { CHECK(tensor[0, -1, 0] == 3.0f); } -TEST_CASE("TensorTest - Indexing Operator - Normal Slicing") { +TEST_CASE("TensorTest - Indexing Operator - Normal Slicing") +{ Tensor<> tensor_1d = {1.0f, 2.0f, 3.0f, 4.0f}; Tensor<> sliced_tensor_1d_1 = tensor_1d.index({":2"}); CHECK(sliced_tensor_1d_1.ndim() == 1); @@ -391,7 +406,8 @@ TEST_CASE("TensorTest - Indexing Operator - Normal Slicing") { CHECK(sliced_tensor_2d_1[1, 1] == 4.0f); } -TEST_CASE("TensorTest - Transpose") { +TEST_CASE("TensorTest - Transpose") +{ Tensor<> tensor_2d = {{1.0f, 2.0f}, {3.0f, 4.0f}}; Tensor<> transposed_tensor_2d = tensor_2d.transpose(); CHECK(transposed_tensor_2d.ndim() == 2); @@ -415,50 +431,53 @@ TEST_CASE("TensorTest - Transpose") { CHECK(transposed_tensor_1d[-1, -1] == 4.0f); } -TEST_CASE("TensorTest - flatten") { +TEST_CASE("TensorTest - flatten") +{ Tensor<> tensor_2d = {{1.0f, 2.0f}, {3.0f, 4.0f}}; - tensor_2d.flatten(); - CHECK(tensor_2d.ndim() == 1); - CHECK(tensor_2d.size() == 4); - CHECK(tensor_2d.shapes()[0] == 4); - CHECK(tensor_2d[0] == 1.0f); - CHECK(tensor_2d[1] == 2.0f); - CHECK(tensor_2d[2] == 3.0f); - CHECK(tensor_2d[3] == 4.0f); + Tensor<> flattened_tensor_2d = tensor_2d.flatten(); + CHECK(flattened_tensor_2d.ndim() == 1); + CHECK(flattened_tensor_2d.size() == 4); + CHECK(flattened_tensor_2d.shapes()[0] == 4); + CHECK(flattened_tensor_2d[0] == 1.0f); + CHECK(flattened_tensor_2d[1] == 2.0f); + CHECK(flattened_tensor_2d[2] == 3.0f); + CHECK(flattened_tensor_2d[3] == 4.0f); } -TEST_CASE("TensorTest - reshape") { +TEST_CASE("TensorTest - reshape") +{ Tensor<> tensor_2d = {{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}, {7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}}; - tensor_2d.reshape({2, 3, 2}); - CHECK(tensor_2d.ndim() == 3); - CHECK(tensor_2d.size() == 12); - CHECK(tensor_2d.shapes()[0] == 2); - CHECK(tensor_2d.shapes()[1] == 3); - CHECK(tensor_2d.shapes()[2] == 2); - CHECK(tensor_2d[0, 0, 0] == 1.0f); - CHECK(tensor_2d[0, 0, 1] == 2.0f); - CHECK(tensor_2d[0, 1, 0] == 3.0f); - CHECK(tensor_2d[0, 1, 1] == 4.0f); - CHECK(tensor_2d[0, 2, 0] == 5.0f); - CHECK(tensor_2d[0, 2, 1] == 6.0f); - CHECK(tensor_2d[1, 0, 0] == 7.0f); - CHECK(tensor_2d[-1, -1, -1] == 12.0f); + Tensor<> reshaped_tensor_2d = tensor_2d.reshape({2, 3, 2}); + CHECK(reshaped_tensor_2d.ndim() == 3); + CHECK(reshaped_tensor_2d.size() == 12); + CHECK(reshaped_tensor_2d.shapes()[0] == 2); + CHECK(reshaped_tensor_2d.shapes()[1] == 3); + CHECK(reshaped_tensor_2d.shapes()[2] == 2); + CHECK(reshaped_tensor_2d[0, 0, 0] == 1.0f); + CHECK(reshaped_tensor_2d[0, 0, 1] == 2.0f); + CHECK(reshaped_tensor_2d[0, 1, 0] == 3.0f); + CHECK(reshaped_tensor_2d[0, 1, 1] == 4.0f); + CHECK(reshaped_tensor_2d[0, 2, 0] == 5.0f); + CHECK(reshaped_tensor_2d[0, 2, 1] == 6.0f); + CHECK(reshaped_tensor_2d[1, 0, 0] == 7.0f); + CHECK(reshaped_tensor_2d[-1, -1, -1] == 12.0f); Tensor<> tensor_1d = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; - tensor_1d.reshape({2, 3}); - CHECK(tensor_1d.ndim() == 2); - CHECK(tensor_1d.size() == 6); - CHECK(tensor_1d.shapes()[0] == 2); - CHECK(tensor_1d.shapes()[1] == 3); - CHECK(tensor_1d[0, 0] == 1.0f); - CHECK(tensor_1d[0, 1] == 2.0f); - CHECK(tensor_1d[0, 2] == 3.0f); - CHECK(tensor_1d[1, 0] == 4.0f); - CHECK(tensor_1d[1, 1] == 5.0f); - CHECK(tensor_1d[1, 2] == 6.0f); + Tensor<> reshaped_tensor_1d = tensor_1d.reshape({2, 3}); + CHECK(reshaped_tensor_1d.ndim() == 2); + CHECK(reshaped_tensor_1d.size() == 6); + CHECK(reshaped_tensor_1d.shapes()[0] == 2); + CHECK(reshaped_tensor_1d.shapes()[1] == 3); + CHECK(reshaped_tensor_1d[0, 0] == 1.0f); + CHECK(reshaped_tensor_1d[0, 1] == 2.0f); + CHECK(reshaped_tensor_1d[0, 2] == 3.0f); + CHECK(reshaped_tensor_1d[1, 0] == 4.0f); + CHECK(reshaped_tensor_1d[1, 1] == 5.0f); + CHECK(reshaped_tensor_1d[1, 2] == 6.0f); } -TEST_CASE("TensorTest - abs") { +TEST_CASE("TensorTest - abs") +{ Tensor<> tensor_2d = {{-1.0f, -2.0f}, {3.0f, 4.0f}}; Tensor<> abs_tensor_2d = tensor_2d.abs(); CHECK(abs_tensor_2d.ndim() == 2); @@ -471,7 +490,8 @@ TEST_CASE("TensorTest - abs") { CHECK(abs_tensor_2d[1, 1] == 4.0f); } -TEST_CASE("TensorTest - sum") { +TEST_CASE("TensorTest - sum") +{ Tensor<> tensor_1d = {1.0f, 2.0f, 3.0f, 4.0f}; double sum_1d = tensor_1d.sum(); CHECK(sum_1d == 10.0f); @@ -485,9 +505,11 @@ TEST_CASE("TensorTest - sum") { CHECK(sum_3d == 36.0f); } -TEST_CASE("TensorTest - filter") { +TEST_CASE("TensorTest - filter") +{ Tensor<> tensor_1d = {1.0f, 2.0f, 3.0f, 4.0f}; - Tensor<> filtered_tensor_1d = tensor_1d.filter([](double x) { return x < 3.0f; }); + Tensor<> filtered_tensor_1d = tensor_1d.filter([](double x) + { return x < 3.0f; }); CHECK(filtered_tensor_1d.ndim() == 1); CHECK(filtered_tensor_1d.size() == 4); CHECK(filtered_tensor_1d.shapes()[0] == 4); @@ -497,7 +519,8 @@ TEST_CASE("TensorTest - filter") { CHECK(filtered_tensor_1d[3] == 0.0f); Tensor<> tensor_2d = {{1.0f, 2.0f}, {3.0f, 4.0f}}; - Tensor<> filtered_tensor_2d = tensor_2d.filter([](double x) { return x < 3.0f; }); + Tensor<> filtered_tensor_2d = tensor_2d.filter([](double x) + { return x < 3.0f; }); CHECK(filtered_tensor_2d.ndim() == 2); CHECK(filtered_tensor_2d.size() == 4); CHECK(filtered_tensor_2d.shapes()[0] == 2); @@ -508,7 +531,8 @@ TEST_CASE("TensorTest - filter") { CHECK(filtered_tensor_2d[1, 1] == 0.0f); Tensor<> tensor_3d = {{{1.0f, 2.0f}, {3.0f, 4.0f}}, {{5.0f, 6.0f}, {7.0f, 8.0f}}}; - Tensor<> filtered_tensor_3d = tensor_3d.filter([](double x) { return x < 3.0f; }); + Tensor<> filtered_tensor_3d = tensor_3d.filter([](double x) + { return x < 3.0f; }); CHECK(filtered_tensor_3d.ndim() == 3); CHECK(filtered_tensor_3d.size() == 8); CHECK(filtered_tensor_3d.shapes()[0] == 2); @@ -524,11 +548,13 @@ TEST_CASE("TensorTest - filter") { CHECK(filtered_tensor_3d[1, 1, 1] == 0.0f); } -TEST_CASE("TensorTest - map") { +TEST_CASE("TensorTest - map") +{ double eps = 1e-5f; Tensor<> tensor_1d = {1.0f, 2.0f, 3.0f, 4.0f}; - Tensor<> tensor_1d_exp = tensor_1d.map([](double x) { return exp(x); }); + Tensor<> tensor_1d_exp = tensor_1d.map([](double x) + { return exp(x); }); CHECK(tensor_1d_exp.ndim() == 1); CHECK(tensor_1d_exp.size() == 4); CHECK(tensor_1d_exp.shapes()[0] == 4); @@ -538,7 +564,8 @@ TEST_CASE("TensorTest - map") { CHECK(tensor_1d_exp[3] - exp(4.0f) < eps); Tensor<> tensor_2d = {{1.0f, 2.0f}, {3.0f, 4.0f}}; - Tensor<> tensor_2d_times_10 = tensor_2d.map([](double x) { return x * 10.0f; }); + Tensor<> tensor_2d_times_10 = tensor_2d.map([](double x) + { return x * 10.0f; }); CHECK(tensor_2d_times_10.ndim() == 2); CHECK(tensor_2d_times_10.size() == 4); CHECK(tensor_2d_times_10.shapes()[0] == 2); @@ -549,7 +576,8 @@ TEST_CASE("TensorTest - map") { CHECK(tensor_2d_times_10[1, 1] == 40.0f); Tensor<> tensor_3d = {{{1.0f, 2.0f}, {3.0f, 4.0f}}, {{5.0f, 6.0f}, {7.0f, 8.0f}}}; - Tensor<> tensor_3d_log = tensor_3d.map([](double x) { return log(x); }); + Tensor<> tensor_3d_log = tensor_3d.map([](double x) + { return log(x); }); CHECK(tensor_3d_log.ndim() == 3); CHECK(tensor_3d_log.size() == 8); CHECK(tensor_3d_log.shapes()[0] == 2); @@ -565,7 +593,8 @@ TEST_CASE("TensorTest - map") { CHECK(tensor_3d_log[1, 1, 1] - log(8.0f) < eps); } -TEST_CASE("TensorTest - equal") { +TEST_CASE("TensorTest - equal") +{ Tensor<> tensor_1d = {1.0f, 2.0f, 3.0f, 4.0f}; Tensor<> another_tensor_1d = {1.0f, 2.0f, 5.0f, 4.0f}; Tensor equal_tensor_1d = tensor_1d.equal(another_tensor_1d); @@ -607,7 +636,8 @@ TEST_CASE("TensorTest - equal") { CHECK(equal_tensor_3d[1, 1, 1] == 1); } -TEST_CASE("TensorTest - Matrix Multiplication") { +TEST_CASE("TensorTest - Matrix Multiplication") +{ Tensor<> tensor_2d_1 = {{1.0f, 2.0f}, {3.0f, 4.0f}}; Tensor<> transposed_tensor_2d_1 = tensor_2d_1.transpose(); Tensor<> matrix_multiplication_2d_1 = tensor_2d_1.matmul(transposed_tensor_2d_1); @@ -620,5 +650,4 @@ TEST_CASE("TensorTest - Matrix Multiplication") { CHECK(matrix_multiplication_2d_1[0, 1] == 11.0f); CHECK(matrix_multiplication_2d_1[1, 0] == 11.0f); CHECK(matrix_multiplication_2d_1[1, 1] == 25.0f); - } \ No newline at end of file