From 3480140ac51af2b2d1988c2a925ebf40a2d029dc Mon Sep 17 00:00:00 2001 From: yasahi-hpc Date: Fri, 12 Sep 2025 18:31:50 +0900 Subject: [PATCH 1/2] minor fix in getrf/getrs docs Signed-off-by: yasahi-hpc --- docs/source/API/batched/dense/batched_getrf.rst | 2 +- docs/source/API/batched/dense/batched_getrs.rst | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/source/API/batched/dense/batched_getrf.rst b/docs/source/API/batched/dense/batched_getrf.rst index 3bd1c1e849..05220f971d 100644 --- a/docs/source/API/batched/dense/batched_getrf.rst +++ b/docs/source/API/batched/dense/batched_getrf.rst @@ -13,7 +13,7 @@ Defined in header: :code:`KokkosBatched_Getrf.hpp` invoke(const AViewType &A, const PivViewType &piv); }; -Computes an LU factorization of a general m-by-n matrix :math:`A` using partial pivoting with row interchanges. The factorization has the format +Computes a LU factorization of a general m-by-n matrix :math:`A` using partial pivoting with row interchanges. The factorization has the format :math:`A = P \cdot L \cdot U` where diff --git a/docs/source/API/batched/dense/batched_getrs.rst b/docs/source/API/batched/dense/batched_getrs.rst index 1646d6c682..8646f5e816 100644 --- a/docs/source/API/batched/dense/batched_getrs.rst +++ b/docs/source/API/batched/dense/batched_getrs.rst @@ -13,7 +13,7 @@ Defined in header: :code:`KokkosBatched_Getrs.hpp` invoke(const AViewType &A, const PivViewType &piv, const BViewType &b); }; -Solves a system of the linear equations :math:`A \cdot X = B` or :math:`A^T \cdot X = B` with a general n-by-n matrix :math:`A` using :math:`LU` factorization computed by ``Getrf``. +Solves a system of the linear equations :math:`A \cdot X = B` or :math:`A^T \cdot X = B` or :math:`A^H \cdot X = B` with a general n-by-n matrix :math:`A` using :math:`LU` factorization computed by ``Getrf``. This operation is equivalent to the LAPACK routine ``SGETRS`` (``CGETRS``) or ``DGETRS`` (``ZGETRS``) for single or double precision for real (complex) matrix. Parameters @@ -26,6 +26,10 @@ Parameters Type Requirements ----------------- +- ``ArgTrans`` must be one of the following: + - ``KokkosBatched::Trans::NoTranspose`` to solve a system :math:`A \cdot X = B` + - ``KokkosBatched::Trans::Transpose`` to solve a system :math:`A^T \cdot X = B` + - ``KokkosBatched::Trans::ConjTranspose`` to solve a system :math:`A^H \cdot X = B` - ``ArgAlgo`` must be ``KokkosBatched::Algo::Getrs::Unblocked`` for the unblocked algorithm - ``AViewType`` must be a Kokkos `View `_ of rank 2 containing the general matrix :math:`A` - ``PivViewType`` must be a Kokkos `View `_ of rank 1 containing the pivot indices From 0ad5b380aacc603932c909889c52734699703266 Mon Sep 17 00:00:00 2001 From: yasahi-hpc Date: Fri, 12 Sep 2025 18:33:22 +0900 Subject: [PATCH 2/2] [Docs] Add batched serial gbtrf/gbtrs Signed-off-by: yasahi-hpc --- docs/source/API/batched/dense-index.rst | 6 ++- .../API/batched/dense/batched_gbtrf.rst | 52 +++++++++++++++++++ .../API/batched/dense/batched_gbtrs.rst | 50 ++++++++++++++++++ 3 files changed, 106 insertions(+), 2 deletions(-) create mode 100644 docs/source/API/batched/dense/batched_gbtrf.rst create mode 100644 docs/source/API/batched/dense/batched_gbtrs.rst diff --git a/docs/source/API/batched/dense-index.rst b/docs/source/API/batched/dense-index.rst index 2c19a00a93..f4d3cd51ed 100644 --- a/docs/source/API/batched/dense-index.rst +++ b/docs/source/API/batched/dense-index.rst @@ -7,6 +7,8 @@ API: Batched Dense (DLA) dense/batched_getrf dense/batched_getrs + dense/batched_gbtrf + dense/batched_gbtrs dense/batched_pbtrf dense/batched_pbtrs dense/batched_pttrf @@ -302,11 +304,11 @@ Below are tables summarizing the currently supported function calls in Kokkos Ke - `TeamGesv` - `TeamVectorGesv` * - gbtrf - - `SerialGbtrf` + - :doc:`SerialGbtrf ` - -- - -- * - gbtrs - - `SerialGbtrs` + - :doc:`SerialGbtrs ` - -- - -- * - pbtrf diff --git a/docs/source/API/batched/dense/batched_gbtrf.rst b/docs/source/API/batched/dense/batched_gbtrf.rst new file mode 100644 index 0000000000..787bd36dc3 --- /dev/null +++ b/docs/source/API/batched/dense/batched_gbtrf.rst @@ -0,0 +1,52 @@ +KokkosBatched::Gbtrf +#################### + +Defined in header: :code:`KokkosBatched_Gbtrf.hpp` + +.. code:: c++ + + template + struct SerialGbtrf { + template + KOKKOS_INLINE_FUNCTION + static int + invoke(const ABViewType &Ab, const PivViewType &piv, const int kl, const int ku, + const int m = -1); + }; + +Computes a LU factorization of a general m-by-n band matrix :math:`A` using partial pivoting with row interchanges. The factorization has the format +:math:`A = P \cdot L \cdot U`. + +where + +- :math:`P` is a permutation matrix +- :math:`L` is a lower triangular matrix with unit diagonal elements +- :math:`U` is an upper triangular matrix + +Parameters +========== + +:Ab: On input, :math:`Ab` is a m by n general band matrix :math:`A` in the band storage. On output, the factors :math:`L` and :math:`U` from the factorization :math:`A = P \cdot L \cdot U`. The unit diagonal elements of :math:`L` are not stored. See `LAPACK reference `_ for the band storage format. +:piv: On output, the pivot indices. +:kl: The number of subdiagonals within the band of :math:`A`. kl >= 0 +:ku: The number of superdiagonals within the band of :math:`A`. ku >= 0 +:m: The number of rows of the matrix :math:`A`. (optional, default is -1, corresponding to m == n) + +Type Requirements +----------------- + +- ``ArgAlgo`` must be ``KokkosBatched::Algo::Gbtrf::Unblocked`` for the unblocked algorithm +- ``ABViewType`` must be a Kokkos `View `_ of rank 2 containing the general band matrix :math:`A` +- ``PivViewType`` must be a Kokkos `View `_ of rank 1 containing the pivot indices + +Example +======= + +.. literalinclude:: ../../../../../example/batched_solve/serial_gbtrs.cpp + :language: c++ + +output: + +.. code:: + + gbtrf/gbtrs works correctly! diff --git a/docs/source/API/batched/dense/batched_gbtrs.rst b/docs/source/API/batched/dense/batched_gbtrs.rst new file mode 100644 index 0000000000..af8c000a88 --- /dev/null +++ b/docs/source/API/batched/dense/batched_gbtrs.rst @@ -0,0 +1,50 @@ +KokkosBatched::Gbtrs +#################### + +Defined in header: :code:`KokkosBatched_Gbtrs.hpp` + +.. code:: c++ + + template + struct SerialGbtrs { + template + KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A, const PivViewType &piv, const BViewType &b, const int kl, + const int ku); + }; + +Solves a system of the linear equations :math:`A \cdot X = B` or :math:`A^T \cdot X = B` or :math:`A^H \cdot X = B` with a general n-by-n band matrix :math:`A` using :math:`LU` factorization computed by ``Gbtrf``. +This operation is equivalent to the LAPACK routine ``SGBTRS`` (``CGBTRS``) or ``DGBTRS`` (``ZGBTRS``) for single or double precision for real (complex) matrix. + +Parameters +========== + +:A: Input view containing the factors :math:`L` and :math:`U` from the factorization :math:`A = P \cdot L \cdot U` of the band matrix :math:`A` computed by ``Gbtrf``. See `LAPACK reference `_ for the band storage format. +:piv: The pivot indices computed by ``Gbtrf``. +:B: Input/output view containing the right-hand side on input and the solution on output. +:kl: The number of subdiagonals within the band of :math:`A`. kl >= 0 +:ku: The number of superdiagonals within the band of :math:`A`. ku >= 0 + +Type Requirements +----------------- + +- ``ArgTrans`` must be one of the following: + - ``KokkosBatched::Trans::NoTranspose`` to solve a system :math:`A \cdot X = B` + - ``KokkosBatched::Trans::Transpose`` to solve a system :math:`A^T \cdot X = B` + - ``KokkosBatched::Trans::ConjTranspose`` to solve a system :math:`A^H \cdot X = B` +- ``ArgAlgo`` must be ``KokkosBatched::Algo::Gbtrs::Unblocked`` for the unblocked algorithm +- ``AViewType`` must be a Kokkos `View `_ of rank 2 containing the general band matrix :math:`A` +- ``PivViewType`` must be a Kokkos `View `_ of rank 1 containing the pivot indices +- ``BViewType`` must be a Kokkos `View `_ of rank 1 containing the right-hand side that satisfies + - ``std::is_same_v == true`` + +Example +======= + +.. literalinclude:: ../../../../../example/batched_solve/serial_gbtrs.cpp + :language: c++ + +output: + +.. code:: + + gbtrf/gbtrs works correctly!