Skip to content

Commit 11fa0de

Browse files
committed
cmd/compile: use OpMove instead of memmove more on arm64
OpMove is faster for small moves of fixed size. For safety, we have to rewrite the Move rewrite rules a bit so that all the loads are done before any stores happen. Also use an 8-byte move instead of a 16-byte move if the tail is at most 8 bytes. Change-Id: I7f6c7496ac6d5eb2e0706fd59ca4b5d797c51101 Reviewed-on: https://go-review.googlesource.com/c/go/+/672997 Reviewed-by: Keith Randall <khr@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: David Chase <drchase@google.com>
1 parent fc641e7 commit 11fa0de

File tree

3 files changed

+160
-28
lines changed

3 files changed

+160
-28
lines changed

src/cmd/compile/internal/ssa/_gen/ARM64.rules

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -472,26 +472,39 @@
472472
(MOVDstore dst (MOVDload src mem) mem))
473473
(Move [16] dst src mem) =>
474474
(STP dst (Select0 <typ.UInt64> (LDP src mem)) (Select1 <typ.UInt64> (LDP src mem)) mem)
475-
(Move [32] dst src mem) =>
476-
(STP [16] dst (Select0 <typ.UInt64> (LDP [16] src mem)) (Select1 <typ.UInt64> (LDP [16] src mem))
475+
476+
(Move [s] dst src mem) && s > 16 && s <= 24 =>
477+
(MOVDstore [int32(s-8)] dst (MOVDload [int32(s-8)] src mem)
478+
(STP dst (Select0 <typ.UInt64> (LDP src mem)) (Select1 <typ.UInt64> (LDP src mem)) mem))
479+
(Move [s] dst src mem) && s > 24 && s <= 32 =>
480+
(STP [int32(s-16)] dst (Select0 <typ.UInt64> (LDP [int32(s-16)] src mem)) (Select1 <typ.UInt64> (LDP [int32(s-16)] src mem))
477481
(STP dst (Select0 <typ.UInt64> (LDP src mem)) (Select1 <typ.UInt64> (LDP src mem)) mem))
478-
(Move [48] dst src mem) =>
479-
(STP [32] dst (Select0 <typ.UInt64> (LDP [32] src mem)) (Select1 <typ.UInt64> (LDP [32] src mem))
482+
(Move [s] dst src mem) && s > 32 && s <= 40 =>
483+
(MOVDstore [int32(s-8)] dst (MOVDload [int32(s-8)] src mem)
484+
(STP [16] dst (Select0 <typ.UInt64> (LDP [16] src mem)) (Select1 <typ.UInt64> (LDP [16] src mem))
485+
(STP dst (Select0 <typ.UInt64> (LDP src mem)) (Select1 <typ.UInt64> (LDP src mem)) mem)))
486+
(Move [s] dst src mem) && s > 40 && s <= 48 =>
487+
(STP [int32(s-16)] dst (Select0 <typ.UInt64> (LDP [int32(s-16)] src mem)) (Select1 <typ.UInt64> (LDP [int32(s-16)] src mem))
480488
(STP [16] dst (Select0 <typ.UInt64> (LDP [16] src mem)) (Select1 <typ.UInt64> (LDP [16] src mem))
481489
(STP dst (Select0 <typ.UInt64> (LDP src mem)) (Select1 <typ.UInt64> (LDP src mem)) mem)))
482-
(Move [64] dst src mem) =>
483-
(STP [48] dst (Select0 <typ.UInt64> (LDP [48] src mem)) (Select1 <typ.UInt64> (LDP [48] src mem))
490+
(Move [s] dst src mem) && s > 48 && s <= 56 =>
491+
(MOVDstore [int32(s-8)] dst (MOVDload [int32(s-8)] src mem)
492+
(STP [32] dst (Select0 <typ.UInt64> (LDP [32] src mem)) (Select1 <typ.UInt64> (LDP [32] src mem))
493+
(STP [16] dst (Select0 <typ.UInt64> (LDP [16] src mem)) (Select1 <typ.UInt64> (LDP [16] src mem))
494+
(STP dst (Select0 <typ.UInt64> (LDP src mem)) (Select1 <typ.UInt64> (LDP src mem)) mem))))
495+
(Move [s] dst src mem) && s > 56 && s <= 64 =>
496+
(STP [int32(s-16)] dst (Select0 <typ.UInt64> (LDP [int32(s-16)] src mem)) (Select1 <typ.UInt64> (LDP [int32(s-16)] src mem))
484497
(STP [32] dst (Select0 <typ.UInt64> (LDP [32] src mem)) (Select1 <typ.UInt64> (LDP [32] src mem))
485498
(STP [16] dst (Select0 <typ.UInt64> (LDP [16] src mem)) (Select1 <typ.UInt64> (LDP [16] src mem))
486499
(STP dst (Select0 <typ.UInt64> (LDP src mem)) (Select1 <typ.UInt64> (LDP src mem)) mem))))
487500

488501
// strip off fractional word move
489-
(Move [s] dst src mem) && s%16 != 0 && s%16 <= 8 && s > 16 =>
502+
(Move [s] dst src mem) && s%16 != 0 && s%16 <= 8 && s > 64 =>
490503
(Move [8]
491504
(OffPtr <dst.Type> dst [s-8])
492505
(OffPtr <src.Type> src [s-8])
493506
(Move [s-s%16] dst src mem))
494-
(Move [s] dst src mem) && s%16 != 0 && s%16 > 8 && s > 16 =>
507+
(Move [s] dst src mem) && s%16 != 0 && s%16 > 8 && s > 64 =>
495508
(Move [16]
496509
(OffPtr <dst.Type> dst [s-16])
497510
(OffPtr <src.Type> src [s-16])

src/cmd/compile/internal/ssa/rewrite.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1490,7 +1490,9 @@ func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
14901490
switch c.arch {
14911491
case "amd64":
14921492
return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
1493-
case "386", "arm64":
1493+
case "arm64":
1494+
return sz <= 64 || (sz <= 1024 && disjoint(dst, sz, src, sz))
1495+
case "386":
14941496
return sz <= 8
14951497
case "s390x", "ppc64", "ppc64le":
14961498
return sz <= 8 || disjoint(dst, sz, src, sz)

src/cmd/compile/internal/ssa/rewriteARM64.go

Lines changed: 136 additions & 19 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)