GPUEngineering · ruairimoran · Aug 2, 2024 · Aug 2, 2024 · Aug 2, 2024 · Aug 2, 2024
@@ -10,7 +10,7 @@ jobs:
     runs-on: ${{ matrix.runner }}
     strategy:
       matrix:
-        runner: [orin, a40]
+        runner: [a40, orin]
     steps:
       - name: checkout code
         uses: actions/checkout@v4

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 
+<!-- ---------------------
+      v1.1.0
+     --------------------- -->
+## v1.1.0 - 03-08-2024
+
+### Added
+
+- Implementation and test of methods `.maxAbs()` and `.minAbs()` for any tensor.
+
 <!-- ---------------------
       v1.0.0
      --------------------- -->
@@ -21,7 +30,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Using a function `numBlocks` instead of the macro `DIM2BLOCKS`
 - Using `TEMPLATE_WITH_TYPE_T` and `TEMPLATE_CONSTRAINT_REQUIRES_FPX` for the code to run on both C++17 and C++20
 
-
 <!-- ---------------------
       v0.1.0
      --------------------- -->

diff --git a/include/tensor.cuh b/include/tensor.cuh
@@ -377,6 +377,19 @@ public:
      */
     T sumAbs() const;
 
+    /**
+     * Maximum of absolute of all elements.
+     * Equivalent to inf-norm, max(|x_i|) for all i.
+     * @return max of absolute as same data type
+     */
+    T maxAbs() const;
+
+    /**
+     * Minimum of absolute of all elements, min(|x_i|) for all i.
+     * @return min of absolute as same data type
+     */
+    T minAbs() const;
+
     /**
      * Solves for the least squares solution of A \ b.
      * A is this tensor and b is the provided tensor.
@@ -405,7 +418,7 @@ public:
 
     DTensor &operator=(const DTensor &other);
 
-    T operator()(size_t i, size_t j = 0, size_t k = 0);
+    T operator()(size_t i, size_t j = 0, size_t k = 0) const;
 
     DTensor &operator*=(T scalar);
 
@@ -605,7 +618,6 @@ inline float DTensor<float>::normF() const {
     return the_norm;
 }
 
-
 template<>
 inline float DTensor<float>::sumAbs() const {
     float sumAbsAllElements;
@@ -622,6 +634,46 @@ inline double DTensor<double>::sumAbs() const {
     return sumAbsAllElements;
 }
 
+template<>
+inline float DTensor<float>::maxAbs() const {
+    int idx;
+    float hostDst;
+    gpuErrChk(cublasIsamax(Session::getInstance().cuBlasHandle(), m_numRows * m_numCols * m_numMats, m_d_data, 1,
+                           &idx));
+    gpuErrChk(cudaMemcpy(&hostDst, m_d_data + idx - 1, sizeof(float), cudaMemcpyDeviceToHost));
+    return std::signbit(hostDst) ? -hostDst : hostDst;
+}
+
+template<>
+inline double DTensor<double>::maxAbs() const {
+    int idx;
+    double hostDst;
+    gpuErrChk(cublasIdamax(Session::getInstance().cuBlasHandle(), m_numRows * m_numCols * m_numMats, m_d_data, 1,
+                           &idx));
+    gpuErrChk(cudaMemcpy(&hostDst, m_d_data + idx - 1, sizeof(double), cudaMemcpyDeviceToHost));
+    return std::signbit(hostDst) ? -hostDst : hostDst;
+}
+
+template<>
+inline float DTensor<float>::minAbs() const {
+    int idx;
+    float hostDst;
+    gpuErrChk(cublasIsamin(Session::getInstance().cuBlasHandle(), m_numRows * m_numCols * m_numMats, m_d_data, 1,
+                           &idx));
+    gpuErrChk(cudaMemcpy(&hostDst, m_d_data + idx - 1, sizeof(float), cudaMemcpyDeviceToHost));
+    return std::signbit(hostDst) ? -hostDst : hostDst;
+}
+
+template<>
+inline double DTensor<double>::minAbs() const {
+    int idx;
+    double hostDst;
+    gpuErrChk(cublasIdamin(Session::getInstance().cuBlasHandle(), m_numRows * m_numCols * m_numMats, m_d_data, 1,
+                           &idx));
+    gpuErrChk(cudaMemcpy(&hostDst, m_d_data + idx - 1, sizeof(double), cudaMemcpyDeviceToHost));
+    return std::signbit(hostDst) ? -hostDst : hostDst;
+}
+
 template<typename T>
 inline bool DTensor<T>::allocateOnDevice(size_t size, bool zero) {
     if (size <= 0) return false;
@@ -772,7 +824,7 @@ inline DTensor<double> &DTensor<double>::operator-=(const DTensor<double> &rhs)
 }
 
 template<typename T>
-inline T DTensor<T>::operator()(size_t i, size_t j, size_t k) {
+inline T DTensor<T>::operator()(size_t i, size_t j, size_t k) const {
     T hostDst;
     size_t offset = i + m_numRows * (j + m_numCols * k);
     gpuErrChk(cudaMemcpy(&hostDst, m_d_data + offset, sizeof(T), cudaMemcpyDeviceToHost));

diff --git a/test/testTensor.cu b/test/testTensor.cu
@@ -352,11 +352,45 @@ void tensorSumAbs() {
     EXPECT_NEAR(112, tenz.sumAbs(), PRECISION_HIGH); // from MATLAB
 }
 
-TEST_F(TensorTest, tensorNormFtensorSumAbs) {
+TEST_F(TensorTest, tensorSumAbs) {
     tensorSumAbs<float>();
     tensorSumAbs<double>();
 }
 
+/* ---------------------------------------
+ * Tensor: max of absolute of all elements
+ * --------------------------------------- */
+
+TEMPLATE_WITH_TYPE_T
+void tensorMax() {
+    std::vector<T> data = TENSOR_DATA_234AMB;
+    DTensor<T> tenz(data, 2, 3, 4);
+    T m = tenz.maxAbs();
+    EXPECT_EQ(27, m);
+}
+
+TEST_F(TensorTest, tensorMax) {
+    tensorMax<float>();
+    tensorMax<double>();
+}
+
+/* ---------------------------------------
+ * Tensor: min of absolute of all elements
+ * --------------------------------------- */
+
+TEMPLATE_WITH_TYPE_T
+void tensorMin() {
+    std::vector<T> data = TENSOR_DATA_234AMB;
+    DTensor<T> tenz(data, 2, 3, 4);
+    T m = tenz.minAbs();
+    EXPECT_EQ(0, m);
+}
+
+TEST_F(TensorTest, tensorMin) {
+    tensorMin<float>();
+    tensorMin<double>();
+}
+
 /* ---------------------------------------
  * Tensor operator() to access element
  * e.g., t(2, 3, 4)