Skip to content

Commit 229d8a0

Browse files
authored
Merge pull request #4959 from CDAC-Bengaluru/level-1-sve
SVE Implementation for Level-1 BLAS Routines
2 parents 89f02ed + 3368a4e commit 229d8a0

File tree

8 files changed

+298
-4
lines changed

8 files changed

+298
-4
lines changed

CONTRIBUTORS.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,3 +229,6 @@ In chronological order:
229229

230230
* Christopher Daley <https://github.com/cdaley>
231231
* [2024-01-24] Optimize GEMV forwarding on ARM64 systems
232+
233+
* Aniket P. Garade <https://github.com/garadeaniket> Sushil Pratap Singh <https://github.com/SushilPratap04> Juliya James <https://github.com/Juliya32>
234+
* [2024-12-13] Optimized swap and rot Level-1 BLAS routines with ARM SVE

kernel/arm64/KERNEL.ARMV8SVE

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,8 @@ DAXPYKERNEL = daxpy_thunderx2t99.S
6464
CAXPYKERNEL = zaxpy.S
6565
ZAXPYKERNEL = zaxpy.S
6666

67-
SROTKERNEL = rot.S
68-
DROTKERNEL = rot.S
67+
SROTKERNEL = rot.c
68+
DROTKERNEL = rot.c
6969
CROTKERNEL = zrot.S
7070
ZROTKERNEL = zrot.S
7171

@@ -94,8 +94,8 @@ DCOPYKERNEL = copy_thunderx2t99.c
9494
CCOPYKERNEL = copy_thunderx2t99.c
9595
ZCOPYKERNEL = copy_thunderx2t99.c
9696

97-
SSWAPKERNEL = swap_thunderx2t99.S
98-
DSWAPKERNEL = swap_thunderx2t99.S
97+
SSWAPKERNEL = swap.c
98+
DSWAPKERNEL = swap.c
9999
CSWAPKERNEL = swap_thunderx2t99.S
100100
ZSWAPKERNEL = swap_thunderx2t99.S
101101

kernel/arm64/rot.c

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*******************************************************************************
2+
Copyright (c) 2015, The OpenBLAS Project
3+
All rights reserved.
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are
6+
met:
7+
1. Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
2. Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
3. Neither the name of the OpenBLAS project nor the names of
14+
its contributors may be used to endorse or promote products
15+
derived from this software without specific prior written permission.
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
*******************************************************************************/
27+
#include "common.h"
28+
#include "rot_kernel_sve.c"
29+
#include "rot_kernel_c.c"
30+
31+
int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s)
32+
{
33+
if (n <= 0)
34+
return (0);
35+
if (inc_x == 1 && inc_y == 1)
36+
rot_kernel_sve(n, x, y, c, s);
37+
else
38+
rot_kernel_c(n, x, inc_x, y, inc_y, c, s);
39+
return (0);
40+
}

kernel/arm64/rot_kernel_c.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/*******************************************************************************
2+
Copyright (c) 2015, The OpenBLAS Project
3+
All rights reserved.
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are
6+
met:
7+
1. Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
2. Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
3. Neither the name of the OpenBLAS project nor the names of
14+
its contributors may be used to endorse or promote products
15+
derived from this software without specific prior written permission.
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
*******************************************************************************/
27+
#include "common.h"
28+
29+
static int rot_kernel_c(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s)
30+
{
31+
BLASLONG i = 0;
32+
BLASLONG ix = 0, iy = 0;
33+
FLOAT temp;
34+
while (i < n)
35+
{
36+
temp = c * x[ix] + s * y[iy];
37+
y[iy] = c * y[iy] - s * x[ix];
38+
x[ix] = temp;
39+
ix += inc_x;
40+
iy += inc_y;
41+
i++;
42+
}
43+
return (0);
44+
}

kernel/arm64/rot_kernel_sve.c

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/*******************************************************************************
2+
Copyright (c) 2015, The OpenBLAS Project
3+
All rights reserved.
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are
6+
met:
7+
1. Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
2. Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
3. Neither the name of the OpenBLAS project nor the names of
14+
its contributors may be used to endorse or promote products
15+
derived from this software without specific prior written permission.
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
*******************************************************************************/
27+
#include "common.h"
28+
#include <arm_sve.h>
29+
30+
#ifdef DOUBLE
31+
#define SVE_TYPE svfloat64_t
32+
#define SVE_ZERO svdup_f64(0.0)
33+
#define SVE_WHILELT svwhilelt_b64
34+
#define SVE_ALL svptrue_b64()
35+
#define SVE_WIDTH svcntd()
36+
#else
37+
#define SVE_TYPE svfloat32_t
38+
#define SVE_ZERO svdup_f32(0.0)
39+
#define SVE_WHILELT svwhilelt_b32
40+
#define SVE_ALL svptrue_b32()
41+
#define SVE_WIDTH svcntw()
42+
#endif
43+
44+
static int rot_kernel_sve(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT c, FLOAT s)
45+
{
46+
for (BLASLONG i = 0; i < n; i += SVE_WIDTH)
47+
{
48+
svbool_t pg = SVE_WHILELT((uint64_t)i, (uint64_t)n);
49+
SVE_TYPE x_vec = svld1(pg, &x[i]);
50+
SVE_TYPE y_vec = svld1(pg, &y[i]);
51+
SVE_TYPE cx_vec = svmul_z(pg, x_vec, c);
52+
SVE_TYPE sy_vec = svmul_z(pg, y_vec, s);
53+
SVE_TYPE sx_vec = svmul_z(pg, x_vec, s);
54+
SVE_TYPE cy_vec = svmul_z(pg, y_vec, c);
55+
svst1(pg, &x[i], svadd_z(pg, cx_vec, sy_vec));
56+
svst1(pg, &y[i], svsub_z(pg, cy_vec, sx_vec));
57+
}
58+
return (0);
59+
}

kernel/arm64/swap.c

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/***************************************************************************
2+
Copyright (c) 2013, The OpenBLAS Project
3+
All rights reserved.
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are
6+
met:
7+
1. Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
2. Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
3. Neither the name of the OpenBLAS project nor the names of
14+
its contributors may be used to endorse or promote products
15+
derived from this software without specific prior written permission.
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
*****************************************************************************/
27+
#include "common.h"
28+
#include "swap_kernel_sve.c"
29+
#include "swap_kernel_c.c"
30+
31+
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
32+
{
33+
if (n <= 0)
34+
return 0;
35+
if (inc_x == 1 && inc_y == 1)
36+
swap_kernel_sve(n, x, y);
37+
else
38+
swap_kernel_c(n, x, inc_x, y, inc_y);
39+
return (0);
40+
}

kernel/arm64/swap_kernel_c.c

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/***************************************************************************
2+
Copyright (c) 2013, The OpenBLAS Project
3+
All rights reserved.
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are
6+
met:
7+
1. Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
2. Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
3. Neither the name of the OpenBLAS project nor the names of
14+
its contributors may be used to endorse or promote products
15+
derived from this software without specific prior written permission.
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
*****************************************************************************/
27+
#include "common.h"
28+
#include <stdio.h>
29+
30+
static int swap_kernel_c(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
31+
{
32+
BLASLONG i = 0;
33+
BLASLONG ix = 0, iy = 0;
34+
FLOAT temp;
35+
36+
while (i < n)
37+
{
38+
temp = x[ix];
39+
x[ix] = y[iy];
40+
y[iy] = temp;
41+
ix += inc_x;
42+
iy += inc_y;
43+
i++;
44+
}
45+
return (0);
46+
}

kernel/arm64/swap_kernel_sve.c

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/*******************************************************************************
2+
Copyright (c) 2015, The OpenBLAS Project
3+
All rights reserved.
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are
6+
met:
7+
1. Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
2. Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
3. Neither the name of the OpenBLAS project nor the names of
14+
its contributors may be used to endorse or promote products
15+
derived from this software without specific prior written permission.
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
*******************************************************************************/
27+
#include "common.h"
28+
#include <arm_sve.h>
29+
30+
#ifdef DOUBLE
31+
#define SVE_TYPE svfloat64_t
32+
#define SVE_ZERO svdup_f64(0.0)
33+
#define SVE_WHILELT svwhilelt_b64
34+
#define SVE_ALL svptrue_b64()
35+
#define SVE_WIDTH svcntd()
36+
#else
37+
#define SVE_TYPE svfloat32_t
38+
#define SVE_ZERO svdup_f32(0.0)
39+
#define SVE_WHILELT svwhilelt_b32
40+
#define SVE_ALL svptrue_b32()
41+
#define SVE_WIDTH svcntw()
42+
#endif
43+
44+
static int swap_kernel_sve(BLASLONG n, FLOAT *x, FLOAT *y)
45+
{
46+
BLASLONG sve_width = SVE_WIDTH;
47+
48+
for (BLASLONG i = 0; i < n; i += sve_width * 2)
49+
{
50+
svbool_t pg_a = SVE_WHILELT((uint64_t)i, (uint64_t)n);
51+
svbool_t pg_b = SVE_WHILELT((uint64_t)(i + sve_width), (uint64_t)n);
52+
SVE_TYPE x_vec_a = svld1(pg_a, &x[i]);
53+
SVE_TYPE y_vec_a = svld1(pg_a, &y[i]);
54+
SVE_TYPE x_vec_b = svld1(pg_b, &x[i + sve_width]);
55+
SVE_TYPE y_vec_b = svld1(pg_b, &y[i + sve_width]);
56+
svst1(pg_a, &x[i], y_vec_a);
57+
svst1(pg_a, &y[i], x_vec_a);
58+
svst1(pg_b, &x[i + sve_width], y_vec_b);
59+
svst1(pg_b, &y[i + sve_width], x_vec_b);
60+
}
61+
return (0);
62+
}

0 commit comments

Comments
 (0)