lucaswychan · lucaswychan · May 15, 2025 · Feb 22, 2025 · Feb 22, 2025 · Feb 22, 2025
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -20,6 +20,7 @@ endif()
 include_directories(
     include/
     include/core
+    include/modules/containers
     include/modules/layers
     include/modules/activations
     include/modules/losses
@@ -34,6 +35,7 @@ set(SOURCE_FILES
     src/core/tensor.cpp
     src/utils/tensor_utils.cpp
     src/core/module.cpp
+    src/modules/containers/sequential.cpp
     src/modules/layers/linear.cpp
     src/modules/layers/conv2d.cpp
     src/modules/layers/flatten.cpp
@@ -54,7 +56,7 @@ set(SOURCE_FILES
 add_library(neuralnet ${SOURCE_FILES})
 
 # Add the executable for the main example
-add_executable(main examples/test_conv2d.cpp)
+add_executable(main examples/main.cpp)
 target_link_libraries(main neuralnet)
 
 # Only build tests if BUILD_TESTS is ON

diff --git a/README.md b/README.md
@@ -32,6 +32,8 @@ brew install cmake
 brew install gcc
 ```
 
+If there is any problem, please try uninstalling `cmake` and `gcc`, and reinstalling them afterward. 
+
 For **Linux**, run the following commands:
 
 ```bash
@@ -65,6 +67,41 @@ I implemented a tensor from scratch as well and integrate it to my neural networ
 
 For more details about tensor, please refer to [tensor tutorial](docs/tensor.md).
 
+## Sequential Container
+
+To simply create your own neural network by stacking layers, feel free to use [`Sequential`](include/modules/containers/sequential.hpp). It is similar to keras `Sequential` (Although this repo should be a pytorch-like implementation :) )
+
+### Example Usage
+
+```cpp
+#include "sequential.hpp"
+#include "module.hpp"
+#include "linear.hpp"
+#include "relu.hpp"
+#include "dropout.hpp"
+#include <vector>
+using namespace std;
+
+vector<Module*> layers = { new Linear(768, 256), 
+                           new ReLU(), 
+                           new Dropout(0.2),
+                           new Linear(256, 128),
+                           new ReLU(),
+                           new Dropout(0.2),
+                           new Linear(128, 10),
+                         }
+
+Sequential container = layers;
+
+/*
+To perform forward pass, simply do 'output = container(input)'
+
+Similarily, do 'container.backward(grad_output)' to perform backward pass.
+
+For more details, please check main.cpp in examples
+*/
+```
+
 ## Module API
 
 The module API is defined in [`include/core/module.hpp`](include/core/module.hpp).
@@ -74,7 +111,7 @@ To build your custom module, follow the instructions in `include/core/module.hpp
 ### Example usage
 
 ```cpp
-#include <module.hpp>
+#include "module.hpp"
 using namespace nn;
 
 // Your code here

diff --git a/docs/tensor.md b/docs/tensor.md
@@ -26,7 +26,7 @@ You can create your tensor from C++ array, or using `vector` in C++ STL. You can
 ```cpp
 #include "tensor.hpp"
 
-// default type is double
+// default type is float
 Tensor<> your_tensor = { { 1.2, 2.3, 3.4 }, { 4.5, 5.6, 6.7 } }; // shape: (2, 3)
 
 // Or you can create a tensor with a specific type
@@ -36,7 +36,7 @@ Tensor<int> your_int_tensor = { { 1, 2, 3 }, { 4, 5, 6 }, { 7, 8, 9 } } // shape
 Tensor<> transposed_tensor = your_tensor.transpose(); // shape: (3, 2)
 
 // You can also create a tensor from a vector
-vector<vector<double>> your_vec = { { 1.2, 2.3, 3.4 }, { 4.5, 5.6, 6.7 } };
+vector<vector<float>> your_vec = { { 1.2, 2.3, 3.4 }, { 4.5, 5.6, 6.7 } };
 Tensor<> your_tensor_from_vec = Tensor<>(your_vec);
 ```
 
@@ -172,7 +172,7 @@ Tensor<int> A = { { 1, 2, 3 },
 
 Tensor<float> A_float = A.dtype<float>();
 
-Tensor<> A_double = A.dtype<double>(); // since the default type of tensor is double
+Tensor<> A_float = A.dtype<float>(); // since the default type of tensor is float
 ```
 
 ## Filter the unwanted elements
@@ -200,7 +200,7 @@ Function mapping also can be applied to the tensor, simply by using `map`. It ta
 Tensor<> A = { { 1, 2, 3 },
                { 4, 5, 6 } }; // 2 x 3
 
-Tensor<> A_mapped = A.map([](double x) { return exp(x); });
+Tensor<> A_mapped = A.map([](float x) { return exp(x); });
 /*
 { { 2.71828, 7.38906, 20.0855 },
   { 54.5982, 148.413, 403.429 } }
@@ -245,12 +245,35 @@ Tensor<size_t> tensor_1d_argmax = tensor_1d.argmin();
 
 ## Flatten tensor
 
-You can flatten your tensor using `flatten` function. It returns a 1-D tensor.
+You can flatten your tensor using `flatten` function. It flattens the dimensions of the tensor from start_dim to end_dim into a single dimension. Default of start_dim and end_dim is 0 and -1 respectively.
 
 ```cpp
 Tensor<int> A = { { 1, 2, 3 },
                   { 4, 5, 6 } }; // 2 x 3
 
 Tensor<int> A_flatten = A.flatten();
-// { 1, 2, 3, 4, 5, 6 }
+// [ 1, 2, 3, 4, 5, 6 ]
+
+Tensor<> B_3d = { { { -1, -2, -3 },
+                    {-4, -5, -6 } }, 
+                  { { 1, 2, 3 },
+                    { 4, 5, 6 } } }; // 2 x 2 x 3
+
+Tensor<> B_flatten_12 = B_3d.flatten(0, 1) // flatten the first and second dimension
+/* 
+[
+  [-1, -2, -3],
+  [-4, -5, -6],
+  [1, 2, 3],
+  [4, 5, 6]
+]
+*/
+
+Tensor<> B_flatten_23 = B_3d.flatten(1, 2) // flatten the second and the third (last) dimension
+/*
+[
+  [-1, -2, -3, -4, -5, -6],
+  [1, 2, 3, 4, 5, 6]
+]
+*/
 ```
diff --git a/examples/main.cpp b/examples/main.cpp
@@ -11,10 +11,10 @@ int main()
 
     // Define the hyperparameters
 
-    const double LR = 0.01;
-    const double EPOCH = 10;
-    const double BATCH_SIZE = 64;
-    const double DROPOUT_P = 0.3;
+    const float LR = 0.01;
+    const float EPOCH = 10;
+    const float BATCH_SIZE = 64;
+    const float DROPOUT_P = 0.3;
 
     MNIST dataset(BATCH_SIZE);
 
@@ -29,7 +29,8 @@ int main()
     }
 
     // Initialize the model
-    MLP model = MLP({784, 128, 64, 10}, DROPOUT_P);
+    bool bias = true;
+    MLP model = MLP(784, {128, 64, 10}, bias, DROPOUT_P);
 
     cout << "Finished model initialization" << endl;
 
@@ -38,14 +39,15 @@ int main()
 
     cout << "Finished loss initialization" << endl;
 
-    double loss = 0.0;
-    double acc = 0.0;
-    vector<double> loss_list;
-    vector<double> accuracy_list;
+    float loss = 0.0;
+    float acc = 0.0;
+    vector<float> loss_list;
+    vector<float> accuracy_list;
 
     cout << "Training started..." << endl;
 
-    // // Train the model
+    // ============================ Training ====================================
+
     // Example of iterating through all batches
     for (size_t e = 0; e < EPOCH; e++)
     {
@@ -77,16 +79,16 @@ int main()
             print_stats_line(i, loss, acc);
         }
 
-        double total_loss = accumulate(loss_list.begin(), loss_list.end(), 0.0) / loss_list.size();
-        double total_acc = accumulate(accuracy_list.begin(), accuracy_list.end(), 0.0) / accuracy_list.size() * 100;
+        float total_loss = accumulate(loss_list.begin(), loss_list.end(), 0.0) / loss_list.size();
+        float total_acc = accumulate(accuracy_list.begin(), accuracy_list.end(), 0.0) / accuracy_list.size() * 100;
 
         cout << "------------------------------------" << endl;
         cout << "Total Loss in Epoch " << e + 1 << " = " << total_loss << "" << endl;
         cout << "Total Accuracy in Epoch " << e + 1 << " = " << total_acc << "%" << endl;
         cout << "------------------------------------" << endl;
     }
 
-    // Inference
+    // ============================ Inference ====================================
 
     model.eval();
 
@@ -127,8 +129,11 @@ int main()
         print_stats_line(i, loss, acc);
     }
 
-    double total_loss = accumulate(loss_list.begin(), loss_list.end(), 0.0) / loss_list.size();
-    double total_acc = accumulate(accuracy_list.begin(), accuracy_list.end(), 0.0) / accuracy_list.size() * 100;
+    float total_loss = accumulate(loss_list.begin(), loss_list.end(), 0.0) / loss_list.size();
+    float total_acc = accumulate(accuracy_list.begin(), accuracy_list.end(), 0.0) / accuracy_list.size() * 100;
+
+    cout << "Average Loss on Test Data = " << total_loss << "" << endl;
+    cout << "Average Accuracy on Test Data = " << total_acc << "%" << endl;
 
     cout << "------------------------------------" << endl;
 

diff --git a/examples/test_conv2d.cpp b/examples/test_conv2d.cpp
@@ -32,7 +32,7 @@ int main()
     Tensor<> test_weight = Tensor<>({out_channels, in_channels, weight_size, weight_size}, 0.0f);
     Tensor<> test_bias = Tensor<>({out_channels}, 0.0f);
 
-    double val = 0.01;
+    float val = 0.01;
     for (size_t i = 0; i < out_channels; i++)
     {
         for (size_t j = 0; j < in_channels; j++)
@@ -209,7 +209,7 @@ int main()
 
     output_3 /= 1e6;
 
-    double loss = cross_entropy(output_3, labels);
+    float loss = cross_entropy(output_3, labels);
 
     cout << "Loss: " << loss << endl;
 

diff --git a/examples/test_linear.cpp b/examples/test_linear.cpp
@@ -5,34 +5,32 @@
 #include "dropout.hpp"
 using namespace nn;
 
-int main() {
-        const bool bias = true;
+int main()
+{
+    const bool bias = true;
 
     Linear linear_1(3, 5, bias);
     Linear linear_2(5, 7, bias);
-    Dropout dropout(0.3);
+    // Dropout dropout(0.3);
 
     Tensor<> specific_weights_1 = {
         {0.1, 0.4, 0.7, 1.0, 1.3},
         {0.2, 0.5, 0.8, 1.1, 1.4},
-        {0.3, 0.6, 0.9, 1.2, 1.5}
-    };
-
+        {0.3, 0.6, 0.9, 1.2, 1.5}};
+
     Tensor<> specific_weights_2 = {
         {0.1, 0.6, 1.1, 1.6, 2.1, 2.6, 3.1},
         {0.2, 0.7, 1.2, 1.7, 2.2, 2.7, 3.2},
         {0.3, 0.8, 1.3, 1.8, 2.3, 2.8, 3.3},
         {0.4, 0.9, 1.4, 1.9, 2.4, 2.9, 3.4},
-        {0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5}
-    };
+        {0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5}};
 
     Tensor<> specific_bias_1 = {
         0.1,
         0.2,
         0.3,
         0.4,
-        0.5
-    };
+        0.5};
 
     Tensor<> specific_bias_2 = {
         0.1,
@@ -41,15 +39,13 @@ int main() {
         0.4,
         0.5,
         0.6,
-        0.7
-    };
+        0.7};
 
     Tensor<> input = {
         {1.1f, 2.1f, 3.1f},
         {4.1f, 5.1f, 6.1f},
         {7.1f, 8.1f, 9.1f},
-        {10.1f, 11.1f, 12.1f}
-    };
+        {10.1f, 11.1f, 12.1f}};
 
     cout << "After initialization: " << endl;
 
@@ -62,17 +58,17 @@ int main() {
     cout << "bias 2: " << endl;
     specific_bias_2.print();
 
-    linear_1.setWeights(specific_weights_1);
-    linear_2.setWeights(specific_weights_2);
+    linear_1.set_weight(specific_weights_1);
+    linear_2.set_weight(specific_weights_2);
 
-    linear_1.setBiases(specific_bias_1);
-    linear_2.setBiases(specific_bias_2);
+    linear_1.set_bias(specific_bias_1);
+    linear_2.set_bias(specific_bias_2);
 
     cout << endl;
 
     Tensor<> output_1 = linear_1(input);
-    Tensor<> output_2 = dropout(output_1);
-    Tensor<> Y_hat = linear_2(output_2);
+    // Tensor<> output_2 = dropout(output_1);
+    Tensor<> Y_hat = linear_2(output_1);
 
     cout << "Y_hat: " << endl;
     Y_hat.print();
@@ -83,8 +79,9 @@ int main() {
         {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0},
         {0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0},
         {0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0},
-        {0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0}
-    };
+        {0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0}};
+
+    Tensor<> Y_not_one_hot = {6, 4, 3, 2};
 
     MSE mse;
     CrossEntropyLoss cross_entropy_loss;
@@ -96,9 +93,8 @@ int main() {
 
     Tensor<> dL_dZ = cross_entropy_loss.backward();
     Tensor<> dL_dY_dot = linear_2.backward(dL_dZ);
-    Tensor<> dL_dY = dropout.backward(dL_dY_dot);
-    Tensor<> dL_dX = linear_1.backward(dL_dY);
-
+    // Tensor<> dL_dY = dropout.backward(dL_dY_dot);
+    Tensor<> dL_dX = linear_1.backward(dL_dY_dot);
 
     // ===================softmax=====================
 

diff --git a/examples/test_softmax.cpp b/examples/test_softmax.cpp
@@ -69,7 +69,7 @@ int main() {
     Tensor<> output_softmax = softmax.forward(output_1);
     Tensor<> output_2 = linear_2.forward(output_softmax);
 
-    double cross_entropy_loss = criterion.forward(output_2, label);
+    float cross_entropy_loss = criterion.forward(output_2, label);
 
     cout << "cross entropy loss: " << cross_entropy_loss << endl;