Skip to content

Commit dbcf4f8

Browse files
authored
Merge pull request #4479 from XiWeiGu/loongarch-opt-axpby
Loongarch opt axpby
2 parents dc802dd + 7bc93d9 commit dbcf4f8

File tree

9 files changed

+2415
-5
lines changed

9 files changed

+2415
-5
lines changed

kernel/loongarch64/KERNEL.LOONGSON2K1000

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ ZAXPYKERNEL = caxpy_lsx.S
5858

5959
SAXPBYKERNEL = axpby_lsx.S
6060
DAXPBYKERNEL = axpby_lsx.S
61+
CAXPBYKERNEL = caxpby_lsx.S
62+
ZAXPBYKERNEL = caxpby_lsx.S
6163

6264
SSUMKERNEL = sum_lsx.S
6365
DSUMKERNEL = sum_lsx.S

kernel/loongarch64/KERNEL.LOONGSON3R5

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ ZAXPYKERNEL = caxpy_lasx.S
5858

5959
SAXPBYKERNEL = axpby_lasx.S
6060
DAXPBYKERNEL = axpby_lasx.S
61+
CAXPBYKERNEL = caxpby_lasx.S
62+
ZAXPBYKERNEL = caxpby_lasx.S
6163

6264
SSUMKERNEL = sum_lasx.S
6365
DSUMKERNEL = sum_lasx.S

kernel/loongarch64/axpby_lasx.S

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5757
PROLOGUE
5858

5959
bge $r0, N, .L999
60-
li.d TEMP, 1
6160
movgr2fr.d a1, $r0
6261
ffint.s.l a1, a1
63-
slli.d TEMP, TEMP, BASE_SHIFT
6462
slli.d INCX, INCX, BASE_SHIFT
6563
slli.d INCY, INCY, BASE_SHIFT
6664
MTG t1, ALPHA
@@ -75,6 +73,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
7573
xvreplgr2vr.w VXB, t2
7674
xvreplgr2vr.w VXZ, t3
7775
#endif
76+
// If incx == 0 || incy == 0, do one by one
77+
and TEMP, INCX, INCY
78+
or I, N, N
79+
beqz TEMP, .L998
80+
81+
li.d TEMP, 1
82+
slli.d TEMP, TEMP, BASE_SHIFT
7883
srai.d I, N, 3
7984
bne INCX, TEMP, .L20
8085
bne INCY, TEMP, .L12 // INCX==1 and INCY!=1

kernel/loongarch64/axpby_lsx.S

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5757
PROLOGUE
5858

5959
bge $r0, N, .L999
60-
li.d TEMP, 1
6160
movgr2fr.d a1, $r0
6261
ffint.s.l a1, a1
63-
slli.d TEMP, TEMP, BASE_SHIFT
6462
slli.d INCX, INCX, BASE_SHIFT
6563
slli.d INCY, INCY, BASE_SHIFT
6664
MTG t1, ALPHA
@@ -75,6 +73,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
7573
vreplgr2vr.w VXB, t2
7674
vreplgr2vr.w VXZ, t3
7775
#endif
76+
// If incx == 0 || incy == 0, do one by one
77+
and TEMP, INCX, INCY
78+
or I, N, N
79+
beqz TEMP, .L998
80+
81+
li.d TEMP, 1
82+
slli.d TEMP, TEMP, BASE_SHIFT
7883
srai.d I, N, 3
7984
bne INCX, TEMP, .L20
8085
bne INCY, TEMP, .L12 // INCX==1 and INCY!=1

0 commit comments

Comments
 (0)