@@ -80,6 +80,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
80
80
add sp , sp , #( 11 * 16 )
81
81
.endm
82
82
83
+ .macro INIT_ZERO
84
+ fmul v0.2d , v0.2d , betaV0
85
+ fmul v1.2d , v1.2d , betaV0
86
+ fmul v2.2d , v2.2d , betaV0
87
+ fmul v3.2d , v3.2d , betaV0
88
+ fmul v4.2d , v4.2d , betaV0
89
+ fmul v5.2d , v5.2d , betaV0
90
+ fmul v6.2d , v6.2d , betaV0
91
+ fmul v7.2d , v7.2d , betaV0
92
+ .endm
93
+
83
94
/ **************************************************************************************
84
95
* End of macro definitions
85
96
************************************************************************************** /
@@ -97,6 +108,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
97
108
cmp N , # 0
98
109
ble .Lgemm_beta_L999
99
110
111
+ fcmp BETA , # 0 . 0
112
+ beq .Lgemm_beta_zero_01
113
+
100
114
.Lgemm_beta_01:
101
115
102
116
lsl LDC , LDC , # 3
@@ -180,4 +194,59 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
180
194
RESTORE_REGS
181
195
ret
182
196
197
+ .Lgemm_beta_zero_01:
198
+ INIT_ZERO
199
+ lsl LDC , LDC , # 3
200
+
201
+ . align 5
202
+ .Lgemm_beta_zero_02:
203
+ mov A01 , C00
204
+ add C00 , C00 , LDC
205
+
206
+ asr I , M , # 4
207
+ cmp I , # 0
208
+ ble .Lgemm_beta_zero_04
209
+
210
+ add A02 , A01 , # 64
211
+
212
+ . align 5
213
+ .Lgemm_beta_zero_03:
214
+
215
+ st1 {v0.2d , v1.2d , v2.2d , v3.2d} , [ A01 ]
216
+ add A01 , A01 , calc_size
217
+ st1 {v4.2d , v5.2d , v6.2d , v7.2d} , [ A02 ]
218
+ add A02 , A02 , calc_size
219
+
220
+ subs I , I , # 1
221
+ bne .Lgemm_beta_zero_03
222
+
223
+ . align 5
224
+ .Lgemm_beta_zero_04:
225
+
226
+ and I , M , # 15
227
+ cmp I , # 0
228
+ ble .Lgemm_beta_zero_06
229
+
230
+ . align 5
231
+ .Lgemm_beta_zero_05:
232
+
233
+ str beta0 , [ A01 ]
234
+ add A01 , A01 , # 8
235
+
236
+ subs I , I , # 1
237
+ bne .Lgemm_beta_zero_05
238
+
239
+ . align 5
240
+ .Lgemm_beta_zero_06:
241
+
242
+ subs N , N , # 1
243
+ bne .Lgemm_beta_zero_02
244
+
245
+ . align 5
246
+ .Lgemm_beta_zero_L999:
247
+
248
+ mov x0 , # 0
249
+ RESTORE_REGS
250
+ ret
251
+
183
252
EPILOGUE
0 commit comments