Skip to content

Commit d69f57c

Browse files
authored
Merge pull request #4200 from XiWeiGu/loongarch64_sgemm
LoongArch64: Add sgemm_kernel
2 parents 12ede72 + 553cc13 commit d69f57c

File tree

8 files changed

+4134
-5
lines changed

8 files changed

+4134
-5
lines changed

kernel/loongarch64/KERNEL.LOONGSON3R5

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,24 @@ DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
1111

1212
DGEMVNKERNEL = dgemv_n_8_lasx.S
1313
DGEMVTKERNEL = dgemv_t_8_lasx.S
14+
15+
SGEMMKERNEL = sgemm_kernel_16x8_lasx.S
16+
SGEMMINCOPY = sgemm_ncopy_16_lasx.S
17+
SGEMMITCOPY = sgemm_tcopy_16_lasx.S
18+
SGEMMONCOPY = sgemm_ncopy_8_lasx.S
19+
SGEMMOTCOPY = sgemm_tcopy_8_lasx.S
20+
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
21+
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
22+
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
23+
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
1424
endif
1525

1626
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
1727
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
1828
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
1929
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
30+
31+
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
32+
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
33+
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
34+
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

kernel/loongarch64/loongarch64_asm.S

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3636
#define PTR_ST st.d
3737
#define PTR_SLLI slli.d
3838
#define PTR_SRLI srli.d
39+
#define PTR_SRAI srai.d
40+
#define PTR_MUL mul.d
3941
#define PTR_ALSL alsl.d
4042
#else
4143
#define LA_REG int32_t
@@ -48,6 +50,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4850
#define PTR_ST st.w
4951
#define PTR_SLLI slli.w
5052
#define PTR_SRLI srli.w
53+
#define PTR_SRAI srai.w
54+
#define PTR_MUL mul.w
5155
#define PTR_ALSL alsl.w
5256
#endif
5357

@@ -218,6 +222,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
218222
.endif
219223
.endm
220224
//
225+
// GSUB
226+
//
227+
.macro GSUB pre_op, suf_op:req, out:req, in0:req, in1:req, more:vararg
228+
\pre_op\()sub.\suf_op \out, \in0, \in1
229+
.ifnb \more
230+
GSUB \pre_op, \suf_op, \more
231+
.endif
232+
.endm
233+
//
221234
// GSLLI
222235
//
223236
.macro GSLLI pre_op, suf_op:req, out:req, in0:req, in1:req, more:vararg
@@ -244,6 +257,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
244257
GXOR \pre_op, \suf_op, \more
245258
.endif
246259
.endm
260+
//
261+
// GPERMI
262+
//
263+
.macro GPERMI pre_op:req, suf_op:req, out:req, in0:req, in1:req, more:vararg
264+
\pre_op\()permi.\suf_op \out, \in0, \in1
265+
.ifnb \more
266+
GPERMI \pre_op, \suf_op, \more
267+
.endif
268+
.endm
269+
//
270+
// GNMSUB
271+
//
272+
.macro GNMSUB pre_op:req, suf_op:req, out:req, in0:req, in1:req, in2:req, more:vararg
273+
\pre_op\()nmsub.\suf_op \out, \in0, \in1, \in2
274+
.ifnb \more
275+
GNMSUB \pre_op, \suf_op, \more
276+
.endif
277+
.endm
278+
//
279+
// GPRELD
280+
//
281+
.macro GPRELD in0:req, in1:req, in2:req, more:vararg
282+
preld \in0, \in1, \in2
283+
.ifnb \more
284+
GPRELD \more
285+
.endif
286+
.endm
247287

248288
//
249289
// Compound instructions
@@ -311,3 +351,57 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
311351
GACC \pre_op, \suf_op, \more
312352
.endif
313353
.endm
354+
//
355+
// GMOV
356+
//
357+
.macro GMOV pre_op:req, out:req, in:req, more:vararg
358+
\pre_op\()or.v \out, \in, \in
359+
.ifnb \more
360+
GMOV \pre_op, \more
361+
.endif
362+
.endm
363+
364+
//
365+
// Media Related Macros
366+
//
367+
.macro GSBUTTERFLY pre_op, suf_op, out0, out1, in0, in1
368+
\pre_op\()ilvl.\suf_op \out0, \in0, \in1
369+
\pre_op\()ilvh.\suf_op \out1, \in0, \in1
370+
.endm
371+
.macro GINTERLACE pre_op, suf_op, out0, out1, in0, in1
372+
\pre_op\()pickev.\suf_op \out0, \in0, \in1
373+
\pre_op\()pickod.\suf_op \out1, \in0, \in1
374+
.endm
375+
376+
//
377+
// TRANSPOSE4x4_D: Transpose 4x4 block with double-word elements in vectors,
378+
// has no pre_op param. 128-bit vector instructions are not supported.
379+
//
380+
.macro GTRANSPOSE4x4_D in0, in1, in2, in3, out0, out1, out2, out3, \
381+
vt0, vt1
382+
GSBUTTERFLY xv, d, \vt0, \out1, \in1, \in0
383+
GSBUTTERFLY xv, d, \vt1, \out3, \in3, \in2
384+
GMOV xv, \out0, \vt0, \out2, \vt1, \vt1, \out3
385+
GPERMI xv, q, \out0, \out2, 0x02, \out2, \vt0, 0x31, \out3, \out1, 0x31, \out1, \vt1, 0x02
386+
.endm
387+
388+
.macro GTRANSPOSE8x8_W out0, out1, out2, out3, out4, out5, out6, out7, \
389+
in0, in1, in2, in3, in4, in5, in6, in7, \
390+
tmp0, tmp1, tmp2, tmp3
391+
GSBUTTERFLY xv, w, \tmp0, \tmp2, \in2, \in0
392+
GSBUTTERFLY xv, w, \tmp1, \tmp3, \in3, \in1
393+
GSBUTTERFLY xv, w, \out0, \out1, \tmp1, \tmp0
394+
GSBUTTERFLY xv, w, \out2, \out3, \tmp3, \tmp2
395+
396+
GSBUTTERFLY xv, w, \tmp0, \tmp2, \in6, \in4
397+
GSBUTTERFLY xv, w, \tmp1, \tmp3, \in7, \in5
398+
GSBUTTERFLY xv, w, \out4, \out5, \tmp1, \tmp0
399+
GSBUTTERFLY xv, w, \out6, \out7, \tmp3, \tmp2
400+
401+
GMOV xv, \tmp0, \out0, \tmp1, \out1, \tmp2, \out2, \tmp3, \out3
402+
403+
GPERMI xv, q, \out0, \out4, 0x02, \out1, \out5, 0x02, \
404+
\out2, \out6, 0x02, \out3, \out7, 0x02, \
405+
\out4, \tmp0, 0x31, \out5, \tmp1, 0x31, \
406+
\out6, \tmp2, 0x31, \out7, \tmp3, 0x31
407+
.endm

0 commit comments

Comments
 (0)