Skip to content

Dev #14

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
May 15, 2025
Merged

Dev #14

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ endif()
include_directories(
include/
include/core
include/modules/containers
include/modules/layers
include/modules/activations
include/modules/losses
Expand All @@ -34,6 +35,7 @@ set(SOURCE_FILES
src/core/tensor.cpp
src/utils/tensor_utils.cpp
src/core/module.cpp
src/modules/containers/sequential.cpp
src/modules/layers/linear.cpp
src/modules/layers/conv2d.cpp
src/modules/layers/flatten.cpp
Expand All @@ -54,7 +56,7 @@ set(SOURCE_FILES
add_library(neuralnet ${SOURCE_FILES})

# Add the executable for the main example
add_executable(main examples/test_conv2d.cpp)
add_executable(main examples/main.cpp)
target_link_libraries(main neuralnet)

# Only build tests if BUILD_TESTS is ON
Expand Down
39 changes: 38 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ brew install cmake
brew install gcc
```

If there is any problem, please try uninstalling `cmake` and `gcc`, and reinstalling them afterward.

For **Linux**, run the following commands:

```bash
Expand Down Expand Up @@ -65,6 +67,41 @@ I implemented a tensor from scratch as well and integrate it to my neural networ

For more details about tensor, please refer to [tensor tutorial](docs/tensor.md).

## Sequential Container

To simply create your own neural network by stacking layers, feel free to use [`Sequential`](include/modules/containers/sequential.hpp). It is similar to keras `Sequential` (Although this repo should be a pytorch-like implementation :) )

### Example Usage

```cpp
#include "sequential.hpp"
#include "module.hpp"
#include "linear.hpp"
#include "relu.hpp"
#include "dropout.hpp"
#include <vector>
using namespace std;

vector<Module*> layers = { new Linear(768, 256),
new ReLU(),
new Dropout(0.2),
new Linear(256, 128),
new ReLU(),
new Dropout(0.2),
new Linear(128, 10),
}

Sequential container = layers;

/*
To perform forward pass, simply do 'output = container(input)'

Similarily, do 'container.backward(grad_output)' to perform backward pass.

For more details, please check main.cpp in examples
*/
```

## Module API

The module API is defined in [`include/core/module.hpp`](include/core/module.hpp).
Expand All @@ -74,7 +111,7 @@ To build your custom module, follow the instructions in `include/core/module.hpp
### Example usage

```cpp
#include <module.hpp>
#include "module.hpp"
using namespace nn;

// Your code here
Expand Down
35 changes: 29 additions & 6 deletions docs/tensor.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ You can create your tensor from C++ array, or using `vector` in C++ STL. You can
```cpp
#include "tensor.hpp"

// default type is double
// default type is float
Tensor<> your_tensor = { { 1.2, 2.3, 3.4 }, { 4.5, 5.6, 6.7 } }; // shape: (2, 3)

// Or you can create a tensor with a specific type
Expand All @@ -36,7 +36,7 @@ Tensor<int> your_int_tensor = { { 1, 2, 3 }, { 4, 5, 6 }, { 7, 8, 9 } } // shape
Tensor<> transposed_tensor = your_tensor.transpose(); // shape: (3, 2)

// You can also create a tensor from a vector
vector<vector<double>> your_vec = { { 1.2, 2.3, 3.4 }, { 4.5, 5.6, 6.7 } };
vector<vector<float>> your_vec = { { 1.2, 2.3, 3.4 }, { 4.5, 5.6, 6.7 } };
Tensor<> your_tensor_from_vec = Tensor<>(your_vec);
```

Expand Down Expand Up @@ -172,7 +172,7 @@ Tensor<int> A = { { 1, 2, 3 },

Tensor<float> A_float = A.dtype<float>();

Tensor<> A_double = A.dtype<double>(); // since the default type of tensor is double
Tensor<> A_float = A.dtype<float>(); // since the default type of tensor is float
```

## Filter the unwanted elements
Expand Down Expand Up @@ -200,7 +200,7 @@ Function mapping also can be applied to the tensor, simply by using `map`. It ta
Tensor<> A = { { 1, 2, 3 },
{ 4, 5, 6 } }; // 2 x 3

Tensor<> A_mapped = A.map([](double x) { return exp(x); });
Tensor<> A_mapped = A.map([](float x) { return exp(x); });
/*
{ { 2.71828, 7.38906, 20.0855 },
{ 54.5982, 148.413, 403.429 } }
Expand Down Expand Up @@ -245,12 +245,35 @@ Tensor<size_t> tensor_1d_argmax = tensor_1d.argmin();

## Flatten tensor

You can flatten your tensor using `flatten` function. It returns a 1-D tensor.
You can flatten your tensor using `flatten` function. It flattens the dimensions of the tensor from start_dim to end_dim into a single dimension. Default of start_dim and end_dim is 0 and -1 respectively.

```cpp
Tensor<int> A = { { 1, 2, 3 },
{ 4, 5, 6 } }; // 2 x 3

Tensor<int> A_flatten = A.flatten();
// { 1, 2, 3, 4, 5, 6 }
// [ 1, 2, 3, 4, 5, 6 ]

Tensor<> B_3d = { { { -1, -2, -3 },
{-4, -5, -6 } },
{ { 1, 2, 3 },
{ 4, 5, 6 } } }; // 2 x 2 x 3

Tensor<> B_flatten_12 = B_3d.flatten(0, 1) // flatten the first and second dimension
/*
[
[-1, -2, -3],
[-4, -5, -6],
[1, 2, 3],
[4, 5, 6]
]
*/

Tensor<> B_flatten_23 = B_3d.flatten(1, 2) // flatten the second and the third (last) dimension
/*
[
[-1, -2, -3, -4, -5, -6],
[1, 2, 3, 4, 5, 6]
]
*/
```
35 changes: 20 additions & 15 deletions examples/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ int main()

// Define the hyperparameters

const double LR = 0.01;
const double EPOCH = 10;
const double BATCH_SIZE = 64;
const double DROPOUT_P = 0.3;
const float LR = 0.01;
const float EPOCH = 10;
const float BATCH_SIZE = 64;
const float DROPOUT_P = 0.3;

MNIST dataset(BATCH_SIZE);

Expand All @@ -29,7 +29,8 @@ int main()
}

// Initialize the model
MLP model = MLP({784, 128, 64, 10}, DROPOUT_P);
bool bias = true;
MLP model = MLP(784, {128, 64, 10}, bias, DROPOUT_P);

cout << "Finished model initialization" << endl;

Expand All @@ -38,14 +39,15 @@ int main()

cout << "Finished loss initialization" << endl;

double loss = 0.0;
double acc = 0.0;
vector<double> loss_list;
vector<double> accuracy_list;
float loss = 0.0;
float acc = 0.0;
vector<float> loss_list;
vector<float> accuracy_list;

cout << "Training started..." << endl;

// // Train the model
// ============================ Training ====================================

// Example of iterating through all batches
for (size_t e = 0; e < EPOCH; e++)
{
Expand Down Expand Up @@ -77,16 +79,16 @@ int main()
print_stats_line(i, loss, acc);
}

double total_loss = accumulate(loss_list.begin(), loss_list.end(), 0.0) / loss_list.size();
double total_acc = accumulate(accuracy_list.begin(), accuracy_list.end(), 0.0) / accuracy_list.size() * 100;
float total_loss = accumulate(loss_list.begin(), loss_list.end(), 0.0) / loss_list.size();
float total_acc = accumulate(accuracy_list.begin(), accuracy_list.end(), 0.0) / accuracy_list.size() * 100;

cout << "------------------------------------" << endl;
cout << "Total Loss in Epoch " << e + 1 << " = " << total_loss << "" << endl;
cout << "Total Accuracy in Epoch " << e + 1 << " = " << total_acc << "%" << endl;
cout << "------------------------------------" << endl;
}

// Inference
// ============================ Inference ====================================

model.eval();

Expand Down Expand Up @@ -127,8 +129,11 @@ int main()
print_stats_line(i, loss, acc);
}

double total_loss = accumulate(loss_list.begin(), loss_list.end(), 0.0) / loss_list.size();
double total_acc = accumulate(accuracy_list.begin(), accuracy_list.end(), 0.0) / accuracy_list.size() * 100;
float total_loss = accumulate(loss_list.begin(), loss_list.end(), 0.0) / loss_list.size();
float total_acc = accumulate(accuracy_list.begin(), accuracy_list.end(), 0.0) / accuracy_list.size() * 100;

cout << "Average Loss on Test Data = " << total_loss << "" << endl;
cout << "Average Accuracy on Test Data = " << total_acc << "%" << endl;

cout << "------------------------------------" << endl;

Expand Down
4 changes: 2 additions & 2 deletions examples/test_conv2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ int main()
Tensor<> test_weight = Tensor<>({out_channels, in_channels, weight_size, weight_size}, 0.0f);
Tensor<> test_bias = Tensor<>({out_channels}, 0.0f);

double val = 0.01;
float val = 0.01;
for (size_t i = 0; i < out_channels; i++)
{
for (size_t j = 0; j < in_channels; j++)
Expand Down Expand Up @@ -209,7 +209,7 @@ int main()

output_3 /= 1e6;

double loss = cross_entropy(output_3, labels);
float loss = cross_entropy(output_3, labels);

cout << "Loss: " << loss << endl;

Expand Down
46 changes: 21 additions & 25 deletions examples/test_linear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,34 +5,32 @@
#include "dropout.hpp"
using namespace nn;

int main() {
const bool bias = true;
int main()
{
const bool bias = true;

Linear linear_1(3, 5, bias);
Linear linear_2(5, 7, bias);
Dropout dropout(0.3);
// Dropout dropout(0.3);

Tensor<> specific_weights_1 = {
{0.1, 0.4, 0.7, 1.0, 1.3},
{0.2, 0.5, 0.8, 1.1, 1.4},
{0.3, 0.6, 0.9, 1.2, 1.5}
};

{0.3, 0.6, 0.9, 1.2, 1.5}};

Tensor<> specific_weights_2 = {
{0.1, 0.6, 1.1, 1.6, 2.1, 2.6, 3.1},
{0.2, 0.7, 1.2, 1.7, 2.2, 2.7, 3.2},
{0.3, 0.8, 1.3, 1.8, 2.3, 2.8, 3.3},
{0.4, 0.9, 1.4, 1.9, 2.4, 2.9, 3.4},
{0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5}
};
{0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5}};

Tensor<> specific_bias_1 = {
0.1,
0.2,
0.3,
0.4,
0.5
};
0.5};

Tensor<> specific_bias_2 = {
0.1,
Expand All @@ -41,15 +39,13 @@ int main() {
0.4,
0.5,
0.6,
0.7
};
0.7};

Tensor<> input = {
{1.1f, 2.1f, 3.1f},
{4.1f, 5.1f, 6.1f},
{7.1f, 8.1f, 9.1f},
{10.1f, 11.1f, 12.1f}
};
{10.1f, 11.1f, 12.1f}};

cout << "After initialization: " << endl;

Expand All @@ -62,17 +58,17 @@ int main() {
cout << "bias 2: " << endl;
specific_bias_2.print();

linear_1.setWeights(specific_weights_1);
linear_2.setWeights(specific_weights_2);
linear_1.set_weight(specific_weights_1);
linear_2.set_weight(specific_weights_2);

linear_1.setBiases(specific_bias_1);
linear_2.setBiases(specific_bias_2);
linear_1.set_bias(specific_bias_1);
linear_2.set_bias(specific_bias_2);

cout << endl;

Tensor<> output_1 = linear_1(input);
Tensor<> output_2 = dropout(output_1);
Tensor<> Y_hat = linear_2(output_2);
// Tensor<> output_2 = dropout(output_1);
Tensor<> Y_hat = linear_2(output_1);

cout << "Y_hat: " << endl;
Y_hat.print();
Expand All @@ -83,8 +79,9 @@ int main() {
{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0},
{0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0},
{0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0},
{0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0}
};
{0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0}};

Tensor<> Y_not_one_hot = {6, 4, 3, 2};

MSE mse;
CrossEntropyLoss cross_entropy_loss;
Expand All @@ -96,9 +93,8 @@ int main() {

Tensor<> dL_dZ = cross_entropy_loss.backward();
Tensor<> dL_dY_dot = linear_2.backward(dL_dZ);
Tensor<> dL_dY = dropout.backward(dL_dY_dot);
Tensor<> dL_dX = linear_1.backward(dL_dY);

// Tensor<> dL_dY = dropout.backward(dL_dY_dot);
Tensor<> dL_dX = linear_1.backward(dL_dY_dot);

// ===================softmax=====================

Expand Down
2 changes: 1 addition & 1 deletion examples/test_softmax.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ int main() {
Tensor<> output_softmax = softmax.forward(output_1);
Tensor<> output_2 = linear_2.forward(output_softmax);

double cross_entropy_loss = criterion.forward(output_2, label);
float cross_entropy_loss = criterion.forward(output_2, label);

cout << "cross entropy loss: " << cross_entropy_loss << endl;

Expand Down
Loading