Skip to content

Finish Conv2D and Flatten implementations #13

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 36 commits into from
Feb 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
c060c54
fix: each constructor now have to make it share_ptr individually, whi…
lucaswychan Feb 7, 2025
997b590
refractor: change the weight name
lucaswychan Feb 7, 2025
ca268ac
refractor: remove grad_output
lucaswychan Feb 7, 2025
ba12669
refractor
lucaswychan Feb 7, 2025
387507b
temporarily remove the implementation
lucaswychan Feb 7, 2025
342fb37
add comment to debug
lucaswychan Feb 7, 2025
afaf692
fix: dtype have to construct the share_ptr
lucaswychan Feb 7, 2025
51c0c85
fix: add default constructor
lucaswychan Feb 7, 2025
564bf25
fix: no dtype conversion in to_tensor to enhance efficiency
lucaswychan Feb 7, 2025
de22280
refractor: change the source file sequence
lucaswychan Feb 7, 2025
84bc534
fix: now kernel size, stride, padding, and dilation are tuple of int …
lucaswychan Feb 7, 2025
55f4fd8
feat: finish conv2d constructor and convolution implementation
lucaswychan Feb 9, 2025
9116290
feat: add conv2d.cpp
lucaswychan Feb 9, 2025
037805e
fat: add debug file for debugging convenience
lucaswychan Feb 9, 2025
3d8151b
feat: add inference
lucaswychan Feb 9, 2025
a6af317
feat: add conv2d playground
lucaswychan Feb 9, 2025
d8753f6
fix: refractor and fix reduce impl
lucaswychan Feb 9, 2025
b7e88e2
refractor: formatting
lucaswychan Feb 9, 2025
244cbcc
fix: remove grad initialization in the constructor
lucaswychan Feb 9, 2025
b194ac4
feat: finish the conv2d implementation
lucaswychan Feb 10, 2025
267b481
refractor: remove extra stride
lucaswychan Feb 10, 2025
190270f
fix: add pragma once to prevent re-load
lucaswychan Feb 10, 2025
f5f0749
fix: add conv2d_utils dependency
lucaswychan Feb 10, 2025
21da4ca
refractor: add conv2d playground
lucaswychan Feb 12, 2025
6e33f4e
refractor: change from int64_t to size_t (probably will change it back)
lucaswychan Feb 12, 2025
e2b5cd3
fix: fix the scalar mul by using copy constructor
lucaswychan Feb 12, 2025
160be1b
refractor
lucaswychan Feb 12, 2025
cc81046
feat: add parameters initialization
lucaswychan Feb 18, 2025
89cf351
refractor: change the parameter initialization function name
lucaswychan Feb 18, 2025
bcf962f
feat: finish reshape after considering stride and permute
lucaswychan Feb 18, 2025
da3f7f6
feat: add flatten module
lucaswychan Feb 18, 2025
44e4c56
feat: finish the implementation of conv2d
lucaswychan Feb 20, 2025
91a71a8
feat: finish the implementation of flatten
lucaswychan Feb 20, 2025
b266293
add playground of conv2d
lucaswychan Feb 20, 2025
f03fa99
add flatten module
lucaswychan Feb 20, 2025
d905ded
fix: fix flatten and filter based on unit_test
lucaswychan Feb 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,19 @@ include_directories(

# Add source files
set(SOURCE_FILES
src/core/module.cpp
src/core/tensor.cpp
src/utils/tensor_utils.cpp
src/core/module.cpp
src/modules/layers/linear.cpp
src/modules/layers/conv2d.cpp
src/modules/layers/flatten.cpp
src/utils/conv2d_utils.cpp
src/modules/layers/dropout.cpp
src/modules/layers/conv2d.cpp
src/modules/losses/mse.cpp
src/modules/activations/relu.cpp
src/modules/activations/softmax.cpp
src/modules/losses/cross_entropy.cpp
src/utils/tensor_utils.cpp
src/datasets/mnist.cpp
src/models/mlp.cpp
src/metrics/accuracy.cpp
Expand All @@ -51,7 +54,7 @@ set(SOURCE_FILES
add_library(neuralnet ${SOURCE_FILES})

# Add the executable for the main example
add_executable(main examples/test_tensor.cpp)
add_executable(main examples/test_conv2d.cpp)
target_link_libraries(main neuralnet)

# Only build tests if BUILD_TESTS is ON
Expand Down
4 changes: 4 additions & 0 deletions debug.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
cd build/
cmake -DCMAKE_BUILD_TYPE=Debug ..
make
lldb main
72 changes: 63 additions & 9 deletions examples/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
#include "utils.hpp"
using namespace nn;

int main() {
int main()
{

// Define the hyperparameters

Expand All @@ -21,41 +22,48 @@ int main() {
const string mnist_label_file = "../data/mnist/train-labels.idx1-ubyte";

// load MNIST data
if (!dataset.load_data(mnist_image_file, mnist_label_file)) {
if (!dataset.load_data(mnist_image_file, mnist_label_file))
{
cerr << "Failed to load dataset" << endl;
return 1;
}

// Initialize the model
MLP model = MLP({784, 128, 64, 10}, DROPOUT_P);

cout << "Finished model initialization" << endl;

// Define the loss function
CrossEntropyLoss criterion = CrossEntropyLoss();

cout << "Finished loss initialization" << endl;

double loss = 0.0;
double acc = 0.0;
vector<double> loss_list;
vector<double> accuracy_list;

cout << "Training started..." << endl;

// // Train the model
// Example of iterating through all batches
for (size_t e = 0; e < EPOCH; e++) {
for (size_t e = 0; e < EPOCH; e++)
{
cout << "\nEpoch " << e + 1 << ":\n";
dataset.reset(); // Reset batch counter at the start of each epoch
dataset.reset(); // Reset batch counter at the start of each epoch
loss_list.clear();
accuracy_list.clear();

for (size_t i = 0; i < dataset.get_num_batches(); i++) {

for (size_t i = 0; i < dataset.get_num_batches(); i++)
{
auto batch = dataset.get_next_batch();
auto [data, labels] = batch.to_tensor();

// forward propagation
Tensor<> output = model(data);

loss = criterion(output, labels);
// cout << "After loss" << endl;
acc = metrics::accuracy(output, labels);
// cout << "After acc" << endl;

accuracy_list.push_back(acc);
loss_list.push_back(loss);
Expand All @@ -66,7 +74,7 @@ int main() {
model.update_params(LR);

// print the training stats
print_training_stats_line(i, loss, acc);
print_stats_line(i, loss, acc);
}

double total_loss = accumulate(loss_list.begin(), loss_list.end(), 0.0) / loss_list.size();
Expand All @@ -78,5 +86,51 @@ int main() {
cout << "------------------------------------" << endl;
}

// Inference

model.eval();

const string mnist_image_file_test = "../data/mnist/t10k-images.idx3-ubyte";
const string mnist_label_file_test = "../data/mnist/t10k-labels.idx1-ubyte";

MNIST test_dataset(BATCH_SIZE);

if (!test_dataset.load_data(mnist_image_file_test, mnist_label_file_test))
{
cerr << "Failed to load test dataset" << endl;
return 1;
}

cout << "\n------------------------------------" << endl;
cout << "Testing started..." << endl;

loss = 0.0;
acc = 0.0;
loss_list.clear();
accuracy_list.clear();

for (size_t i = 0; i < test_dataset.get_num_batches(); i++)
{
auto batch = test_dataset.get_next_batch();
auto [data, labels] = batch.to_tensor();

// forward propagation
Tensor<> output = model(data);

loss = criterion(output, labels);
acc = metrics::accuracy(output, labels);

accuracy_list.push_back(acc);
loss_list.push_back(loss);

// print the testing stats
print_stats_line(i, loss, acc);
}

double total_loss = accumulate(loss_list.begin(), loss_list.end(), 0.0) / loss_list.size();
double total_acc = accumulate(accuracy_list.begin(), accuracy_list.end(), 0.0) / accuracy_list.size() * 100;

cout << "------------------------------------" << endl;

return 0;
}
Loading