Skip to content

Commit 4d21365

Browse files
committed
kernel/riscv64:Added support for omatcopy on riscv64.
1 parent 9a7e3f1 commit 4d21365

File tree

3 files changed

+242
-0
lines changed

3 files changed

+242
-0
lines changed

kernel/riscv64/KERNEL.RISCV64_ZVL256B

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,3 +201,9 @@ endif
201201
ifndef ZGEMM_BETA
202202
ZGEMM_BETA = ../generic/zgemm_beta.c
203203
endif
204+
205+
ZOMATCOPY_CN = zomatcopy_cn_vector.c
206+
COMATCOPY_CN = zomatcopy_cn_vector.c
207+
208+
DOMATCOPY_CN = omatcopy_cn_vector.c
209+
SOMATCOPY_CN = omatcopy_cn_vector.c

kernel/riscv64/omatcopy_cn_vector.c

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
/***************************************************************************
2+
Copyright (c) 2013, The OpenBLAS Project
3+
All rights reserved.
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are
6+
met:
7+
1. Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
2. Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
3. Neither the name of the OpenBLAS project nor the names of
14+
its contributors may be used to endorse or promote products
15+
derived from this software without specific prior written permission.
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
*****************************************************************************/
27+
28+
#include "common.h"
29+
30+
31+
#if !defined(DOUBLE)
32+
#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m4)()
33+
#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n)
34+
#define FLOAT_V_T vfloat32m4_t
35+
#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4)
36+
#define VSEV_FLOAT RISCV_RVV(vse32_v_f32m4)
37+
#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f32m4)
38+
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4)
39+
#else
40+
#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m4)()
41+
#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n)
42+
#define FLOAT_V_T vfloat64m4_t
43+
#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4)
44+
#define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4)
45+
#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f64m4)
46+
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4)
47+
#endif
48+
49+
50+
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
51+
{
52+
BLASLONG i,j;
53+
FLOAT *aptr,*bptr;
54+
size_t vl;
55+
56+
FLOAT_V_T va, vb,va1,vb1;
57+
if ( rows <= 0 ) return(0);
58+
if ( cols <= 0 ) return(0);
59+
60+
aptr = a;
61+
bptr = b;
62+
63+
if ( alpha == 0.0 )
64+
{
65+
vl = VSETVL_MAX;
66+
va = VFMVVF_FLOAT(0, vl);
67+
for ( i=0; i<cols ; i++ )
68+
{
69+
for(j=0; j<rows; j+=vl)
70+
{
71+
vl = VSETVL(rows - j);
72+
VSEV_FLOAT(bptr + j, va, vl);
73+
}
74+
bptr += ldb;
75+
}
76+
return(0);
77+
}
78+
79+
if ( alpha == 1.0 )
80+
{
81+
for ( i=0; i<cols ; i++ )
82+
{
83+
for(j=0; j<rows; j+=vl)
84+
{
85+
vl = VSETVL(rows - j);
86+
va = VLEV_FLOAT(aptr + j, vl);
87+
VSEV_FLOAT(bptr + j, va, vl);
88+
}
89+
aptr += lda;
90+
bptr += ldb;
91+
}
92+
return(0);
93+
}
94+
i = 0;
95+
if( cols % 2 ){
96+
97+
for(j=0; j<rows; j+=vl)
98+
{
99+
vl = VSETVL(rows - j);
100+
va = VLEV_FLOAT(aptr + j, vl);
101+
va = VFMULVF_FLOAT(va, alpha, vl);
102+
VSEV_FLOAT(bptr + j, va, vl);
103+
}
104+
aptr += lda;
105+
bptr += ldb;
106+
i = 1;
107+
}
108+
for ( ; i<cols ; i+=2 )
109+
{
110+
for(j=0; j<rows; j+=vl)
111+
{
112+
vl = VSETVL(rows - j);
113+
va = VLEV_FLOAT(aptr + j, vl);
114+
va1= VLEV_FLOAT(aptr + lda + j, vl);
115+
va = VFMULVF_FLOAT(va, alpha, vl);
116+
va1= VFMULVF_FLOAT(va1, alpha, vl);
117+
VSEV_FLOAT(bptr + j, va, vl);
118+
VSEV_FLOAT(bptr + ldb + j, va1, vl);
119+
}
120+
aptr += 2 * lda;
121+
bptr += 2 * ldb;
122+
}
123+
124+
return(0);
125+
}

kernel/riscv64/zomatcopy_cn_vector.c

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
/***************************************************************************
2+
Copyright (c) 2013, The OpenBLAS Project
3+
All rights reserved.
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are
6+
met:
7+
1. Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
2. Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
3. Neither the name of the OpenBLAS project nor the names of
14+
its contributors may be used to endorse or promote products
15+
derived from this software without specific prior written permission.
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
*****************************************************************************/
27+
28+
#include "common.h"
29+
30+
31+
#if !defined(DOUBLE)
32+
#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n)
33+
#define FLOAT_V_T vfloat32m4_t
34+
#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4)
35+
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4)
36+
#define VSEV_FLOAT RISCV_RVV(vse32_v_f32m4)
37+
#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4)
38+
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4)
39+
#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f32m4)
40+
#define VFMUL_VF_FLOAT RISCV_RVV(vfmul_vf_f32m4)
41+
#define VSEV_FLOAT RISCV_RVV(vse32_v_f32m4)
42+
#define VLSEG2_FLOAT RISCV_RVV(vlseg2e32_v_f32m4x2)
43+
#define VSSEG2_FLOAT RISCV_RVV(vsseg2e32_v_f32m4x2)
44+
#define FLOAT_VX2_T vfloat32m4x2_t
45+
#define VGET_VX2 RISCV_RVV(vget_v_f32m4x2_f32m4)
46+
#define VSET_VX2 RISCV_RVV(vset_v_f32m4_f32m4x2)
47+
#else
48+
#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n)
49+
#define FLOAT_V_T vfloat64m4_t
50+
#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4)
51+
#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4)
52+
#define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4)
53+
#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4)
54+
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4)
55+
#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f64m4)
56+
#define VFMUL_VF_FLOAT RISCV_RVV(vfmul_vf_f64m4)
57+
#define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4)
58+
#define VLSEG2_FLOAT RISCV_RVV(vlseg2e64_v_f64m4x2)
59+
#define VSSEG2_FLOAT RISCV_RVV(vsseg2e64_v_f64m4x2)
60+
#define FLOAT_VX2_T vfloat64m4x2_t
61+
#define VGET_VX2 RISCV_RVV(vget_v_f64m4x2_f64m4)
62+
#define VSET_VX2 RISCV_RVV(vset_v_f64m4_f64m4x2)
63+
#endif
64+
65+
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
66+
{
67+
BLASLONG i,j,ia;
68+
FLOAT *aptr,*bptr;
69+
FLOAT_V_T bptr_v0 , bptr_v1 , aptr_v0 ,aptr_v1;
70+
FLOAT_VX2_T va, vb;
71+
unsigned int gvl = 0;
72+
73+
74+
if ( rows <= 0 ) return(0);
75+
if ( cols <= 0 ) return(0);
76+
77+
aptr = a;
78+
bptr = b;
79+
80+
lda *= 2;
81+
ldb *= 2;
82+
for ( i=0; i<cols ; i++ )
83+
{
84+
ia = 0;
85+
for(j=0; j<rows ; j+=gvl)
86+
{
87+
gvl = VSETVL(rows - j);
88+
// bptr[ia + 0] = alpha_r * aptr[ia + 0] - alpha_i * aptr[ia+1];
89+
// bptr[ia + 2] = alpha_r * aptr[ia + 2] - alpha_i * aptr[ia+3];
90+
va = VLSEG2_FLOAT(aptr + ia, gvl);
91+
aptr_v0 = VGET_VX2(va, 0);
92+
aptr_v1 = VGET_VX2(va, 1);
93+
bptr_v1 = VFMUL_VF_FLOAT( aptr_v1, alpha_r,gvl);
94+
bptr_v1 = VFMACCVF_FLOAT(bptr_v1, alpha_i, aptr_v0, gvl);
95+
bptr_v0 = VFMUL_VF_FLOAT( aptr_v0,alpha_r, gvl);
96+
bptr_v0 = VFNMSACVF_FLOAT(bptr_v0, alpha_i, aptr_v1, gvl);
97+
// bptr[ia + 1] = alpha_r * aptr[ia + 1] + alpha_i * aptr[ia+0];
98+
// bptr[ia + 3] = alpha_r * aptr[ia + 3] + alpha_i * aptr[ia+2];
99+
vb = VSET_VX2(vb, 0, bptr_v0);
100+
vb = VSET_VX2(vb, 1, bptr_v1);
101+
VSSEG2_FLOAT(&bptr[ia], vb, gvl);
102+
ia += gvl * 2 ;
103+
104+
}
105+
aptr += lda;
106+
bptr += ldb;
107+
}
108+
109+
return(0);
110+
111+
}

0 commit comments

Comments
 (0)