Skip to content

Min/max methods #39

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
runs-on: ${{ matrix.runner }}
strategy:
matrix:
runner: [orin, a40]
runner: [a40, orin]
steps:
- name: checkout code
uses: actions/checkout@v4
Expand Down
10 changes: 9 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).


<!-- ---------------------
v1.1.0
--------------------- -->
## v1.1.0 - 03-08-2024

### Added

- Implementation and test of methods `.maxAbs()` and `.minAbs()` for any tensor.

<!-- ---------------------
v1.0.0
--------------------- -->
Expand All @@ -21,7 +30,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Using a function `numBlocks` instead of the macro `DIM2BLOCKS`
- Using `TEMPLATE_WITH_TYPE_T` and `TEMPLATE_CONSTRAINT_REQUIRES_FPX` for the code to run on both C++17 and C++20


<!-- ---------------------
v0.1.0
--------------------- -->
Expand Down
58 changes: 55 additions & 3 deletions include/tensor.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,19 @@ public:
*/
T sumAbs() const;

/**
* Maximum of absolute of all elements.
* Equivalent to inf-norm, max(|x_i|) for all i.
* @return max of absolute as same data type
*/
T maxAbs() const;

/**
* Minimum of absolute of all elements, min(|x_i|) for all i.
* @return min of absolute as same data type
*/
T minAbs() const;

/**
* Solves for the least squares solution of A \ b.
* A is this tensor and b is the provided tensor.
Expand Down Expand Up @@ -405,7 +418,7 @@ public:

DTensor &operator=(const DTensor &other);

T operator()(size_t i, size_t j = 0, size_t k = 0);
T operator()(size_t i, size_t j = 0, size_t k = 0) const;

DTensor &operator*=(T scalar);

Expand Down Expand Up @@ -605,7 +618,6 @@ inline float DTensor<float>::normF() const {
return the_norm;
}


template<>
inline float DTensor<float>::sumAbs() const {
float sumAbsAllElements;
Expand All @@ -622,6 +634,46 @@ inline double DTensor<double>::sumAbs() const {
return sumAbsAllElements;
}

template<>
inline float DTensor<float>::maxAbs() const {
int idx;
float hostDst;
gpuErrChk(cublasIsamax(Session::getInstance().cuBlasHandle(), m_numRows * m_numCols * m_numMats, m_d_data, 1,
&idx));
gpuErrChk(cudaMemcpy(&hostDst, m_d_data + idx - 1, sizeof(float), cudaMemcpyDeviceToHost));
return std::signbit(hostDst) ? -hostDst : hostDst;
}

template<>
inline double DTensor<double>::maxAbs() const {
int idx;
double hostDst;
gpuErrChk(cublasIdamax(Session::getInstance().cuBlasHandle(), m_numRows * m_numCols * m_numMats, m_d_data, 1,
&idx));
gpuErrChk(cudaMemcpy(&hostDst, m_d_data + idx - 1, sizeof(double), cudaMemcpyDeviceToHost));
return std::signbit(hostDst) ? -hostDst : hostDst;
}

template<>
inline float DTensor<float>::minAbs() const {
int idx;
float hostDst;
gpuErrChk(cublasIsamin(Session::getInstance().cuBlasHandle(), m_numRows * m_numCols * m_numMats, m_d_data, 1,
&idx));
gpuErrChk(cudaMemcpy(&hostDst, m_d_data + idx - 1, sizeof(float), cudaMemcpyDeviceToHost));
return std::signbit(hostDst) ? -hostDst : hostDst;
}

template<>
inline double DTensor<double>::minAbs() const {
int idx;
double hostDst;
gpuErrChk(cublasIdamin(Session::getInstance().cuBlasHandle(), m_numRows * m_numCols * m_numMats, m_d_data, 1,
&idx));
gpuErrChk(cudaMemcpy(&hostDst, m_d_data + idx - 1, sizeof(double), cudaMemcpyDeviceToHost));
return std::signbit(hostDst) ? -hostDst : hostDst;
}

template<typename T>
inline bool DTensor<T>::allocateOnDevice(size_t size, bool zero) {
if (size <= 0) return false;
Expand Down Expand Up @@ -772,7 +824,7 @@ inline DTensor<double> &DTensor<double>::operator-=(const DTensor<double> &rhs)
}

template<typename T>
inline T DTensor<T>::operator()(size_t i, size_t j, size_t k) {
inline T DTensor<T>::operator()(size_t i, size_t j, size_t k) const {
T hostDst;
size_t offset = i + m_numRows * (j + m_numCols * k);
gpuErrChk(cudaMemcpy(&hostDst, m_d_data + offset, sizeof(T), cudaMemcpyDeviceToHost));
Expand Down
36 changes: 35 additions & 1 deletion test/testTensor.cu
Original file line number Diff line number Diff line change
Expand Up @@ -352,11 +352,45 @@ void tensorSumAbs() {
EXPECT_NEAR(112, tenz.sumAbs(), PRECISION_HIGH); // from MATLAB
}

TEST_F(TensorTest, tensorNormFtensorSumAbs) {
TEST_F(TensorTest, tensorSumAbs) {
tensorSumAbs<float>();
tensorSumAbs<double>();
}

/* ---------------------------------------
* Tensor: max of absolute of all elements
* --------------------------------------- */

TEMPLATE_WITH_TYPE_T
void tensorMax() {
std::vector<T> data = TENSOR_DATA_234AMB;
DTensor<T> tenz(data, 2, 3, 4);
T m = tenz.maxAbs();
EXPECT_EQ(27, m);
}

TEST_F(TensorTest, tensorMax) {
tensorMax<float>();
tensorMax<double>();
}

/* ---------------------------------------
* Tensor: min of absolute of all elements
* --------------------------------------- */

TEMPLATE_WITH_TYPE_T
void tensorMin() {
std::vector<T> data = TENSOR_DATA_234AMB;
DTensor<T> tenz(data, 2, 3, 4);
T m = tenz.minAbs();
EXPECT_EQ(0, m);
}

TEST_F(TensorTest, tensorMin) {
tensorMin<float>();
tensorMin<double>();
}

/* ---------------------------------------
* Tensor operator() to access element
* e.g., t(2, 3, 4)
Expand Down
Loading