Skip to content

Commit 995768b

Browse files
authored
Merge pull request #2351 from Zeyiii/develop
prefetching for dgemm_beta
2 parents 96ad579 + d1b5380 commit 995768b

File tree

1 file changed

+14
-9
lines changed

1 file changed

+14
-9
lines changed

kernel/arm64/dgemm_beta.S

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4343
#define betaV0 v11.d[0]
4444
#define I x16
4545

46-
#define size 128
46+
#define prfm_size 640
47+
#define calc_size 128
4748

4849
/**************************************************************************************
4950
* Macro definitions
@@ -119,27 +120,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
119120
ldp q2, q3, [A02]
120121
ldp q4, q5, [A03]
121122
ldp q6, q7, [A04]
122-
123+
123124
fmul v0.2d, v0.2d, betaV0
124125
fmul v1.2d, v1.2d, betaV0
125-
126+
126127
fmul v2.2d, v2.2d, betaV0
127128
fmul v3.2d, v3.2d, betaV0
128-
129+
130+
prfm PLDL1KEEP, [A01, prfm_size]
131+
129132
fmul v4.2d, v4.2d, betaV0
130133
fmul v5.2d, v5.2d, betaV0
131-
134+
135+
prfm PLDL1KEEP, [A03, prfm_size]
136+
132137
fmul v6.2d, v6.2d, betaV0
133138
fmul v7.2d, v7.2d, betaV0
134139

135140
st1 {v0.2d, v1.2d}, [A01]
136-
add A01, A01, size
141+
add A01, A01, calc_size
137142
st1 {v2.2d, v3.2d}, [A02]
138-
add A02, A02, size
143+
add A02, A02, calc_size
139144
st1 {v4.2d, v5.2d}, [A03]
140-
add A03, A03, size
145+
add A03, A03, calc_size
141146
st1 {v6.2d, v7.2d}, [A04]
142-
add A04, A04, size
147+
add A04, A04, calc_size
143148

144149
subs I , I , #1
145150
bne .Lgemm_beta_03

0 commit comments

Comments
 (0)