Skip to content

Commit 456ee2e

Browse files
authored
Merge pull request #2357 from chenxuqiang/dgemm_beta_zero
kernel/arm64/dgemm_beta.S: add beta == zero branch
2 parents 9998f8e + 52de4cc commit 456ee2e

File tree

1 file changed

+69
-0
lines changed

1 file changed

+69
-0
lines changed

kernel/arm64/dgemm_beta.S

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
8080
add sp, sp, #(11*16)
8181
.endm
8282

83+
.macro INIT_ZERO
84+
fmul v0.2d, v0.2d, betaV0
85+
fmul v1.2d, v1.2d, betaV0
86+
fmul v2.2d, v2.2d, betaV0
87+
fmul v3.2d, v3.2d, betaV0
88+
fmul v4.2d, v4.2d, betaV0
89+
fmul v5.2d, v5.2d, betaV0
90+
fmul v6.2d, v6.2d, betaV0
91+
fmul v7.2d, v7.2d, betaV0
92+
.endm
93+
8394
/**************************************************************************************
8495
* End of macro definitions
8596
**************************************************************************************/
@@ -97,6 +108,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
97108
cmp N, #0
98109
ble .Lgemm_beta_L999
99110

111+
fcmp BETA, #0.0
112+
beq .Lgemm_beta_zero_01
113+
100114
.Lgemm_beta_01:
101115

102116
lsl LDC, LDC, #3
@@ -180,4 +194,59 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
180194
RESTORE_REGS
181195
ret
182196

197+
.Lgemm_beta_zero_01:
198+
INIT_ZERO
199+
lsl LDC, LDC, #3
200+
201+
.align 5
202+
.Lgemm_beta_zero_02:
203+
mov A01, C00
204+
add C00, C00, LDC
205+
206+
asr I, M, #4
207+
cmp I, #0
208+
ble .Lgemm_beta_zero_04
209+
210+
add A02, A01, #64
211+
212+
.align 5
213+
.Lgemm_beta_zero_03:
214+
215+
st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [A01]
216+
add A01, A01, calc_size
217+
st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [A02]
218+
add A02, A02, calc_size
219+
220+
subs I, I, #1
221+
bne .Lgemm_beta_zero_03
222+
223+
.align 5
224+
.Lgemm_beta_zero_04:
225+
226+
and I, M, #15
227+
cmp I, #0
228+
ble .Lgemm_beta_zero_06
229+
230+
.align 5
231+
.Lgemm_beta_zero_05:
232+
233+
str beta0, [A01]
234+
add A01, A01, #8
235+
236+
subs I, I, #1
237+
bne .Lgemm_beta_zero_05
238+
239+
.align 5
240+
.Lgemm_beta_zero_06:
241+
242+
subs N, N, #1
243+
bne .Lgemm_beta_zero_02
244+
245+
.align 5
246+
.Lgemm_beta_zero_L999:
247+
248+
mov x0, #0
249+
RESTORE_REGS
250+
ret
251+
183252
EPILOGUE

0 commit comments

Comments
 (0)