Skip to content

Commit b6515ae

Browse files
authored
[AArch64] Align 0-cycle reg-mov model of GPR64, GPR32 reg classes (#146051)
Aligns 0-cycle register MOV model of GPR64 and GPR32 register classes to that of FPR64 and FPR32 resolved in: #144152. - Splits `FeatureZCRegMove` into `FeatureZCRegMoveGPR64` and `FeatureZCRegMove32` and fix Apple processors and `AArch64InstrInfo` accordingly - Aligns the test `arm64-zero-cycle-regmov-gpr.ll` to the FPR one The target feature name change is effectively a breaking change. The absolute most of users shouldn't use `-mzcm` directly, so I think it should be ok to make an immediate switch, unless this doesn't align with the conventions in this project. The patch adds a release note for that.
1 parent 69b8e59 commit b6515ae

File tree

5 files changed

+73
-59
lines changed

5 files changed

+73
-59
lines changed

llvm/lib/Target/AArch64/AArch64Features.td

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -612,8 +612,11 @@ def FeatureExperimentalZeroingPseudos
612612
def FeatureNoSVEFPLD1R : SubtargetFeature<"no-sve-fp-ld1r",
613613
"NoSVEFPLD1R", "true", "Avoid using LD1RX instructions for FP">;
614614

615-
def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
616-
"Has zero-cycle register moves">;
615+
def FeatureZCRegMoveGPR64 : SubtargetFeature<"zcm-gpr64", "HasZeroCycleRegMoveGPR64", "true",
616+
"Has zero-cycle register moves for GPR64 registers">;
617+
618+
def FeatureZCRegMoveGPR32 : SubtargetFeature<"zcm-gpr32", "HasZeroCycleRegMoveGPR32", "true",
619+
"Has zero-cycle register moves for GPR32 registers">;
617620

618621
def FeatureZCRegMoveFPR64 : SubtargetFeature<"zcm-fpr64", "HasZeroCycleRegMoveFPR64", "true",
619622
"Has zero-cycle register moves for FPR64 registers">;

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5037,7 +5037,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
50375037

50385038
if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
50395039
// If either operand is WSP, expand to ADD #0.
5040-
if (Subtarget.hasZeroCycleRegMove()) {
5040+
if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5041+
!Subtarget.hasZeroCycleRegMoveGPR32()) {
50415042
// Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
50425043
MCRegister DestRegX = TRI->getMatchingSuperReg(
50435044
DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
@@ -5063,7 +5064,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
50635064
.addImm(0)
50645065
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
50655066
} else {
5066-
if (Subtarget.hasZeroCycleRegMove()) {
5067+
if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5068+
!Subtarget.hasZeroCycleRegMoveGPR32()) {
50675069
// Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
50685070
MCRegister DestRegX = TRI->getMatchingSuperReg(
50695071
DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);

llvm/lib/Target/AArch64/AArch64Processors.td

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ def TuneAppleA7 : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7",
311311
FeatureDisableLatencySchedHeuristic,
312312
FeatureFuseAES, FeatureFuseCryptoEOR,
313313
FeatureStorePairSuppress,
314-
FeatureZCRegMove,
314+
FeatureZCRegMoveGPR64,
315315
FeatureZCRegMoveFPR64,
316316
FeatureZCZeroing,
317317
FeatureZCZeroingFPWorkaround]>;
@@ -325,7 +325,7 @@ def TuneAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10",
325325
FeatureFuseAES,
326326
FeatureFuseCryptoEOR,
327327
FeatureStorePairSuppress,
328-
FeatureZCRegMove,
328+
FeatureZCRegMoveGPR64,
329329
FeatureZCRegMoveFPR64,
330330
FeatureZCZeroing]>;
331331

@@ -338,7 +338,7 @@ def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11",
338338
FeatureFuseAES,
339339
FeatureFuseCryptoEOR,
340340
FeatureStorePairSuppress,
341-
FeatureZCRegMove,
341+
FeatureZCRegMoveGPR64,
342342
FeatureZCRegMoveFPR64,
343343
FeatureZCZeroing]>;
344344

@@ -351,7 +351,7 @@ def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12",
351351
FeatureFuseAES,
352352
FeatureFuseCryptoEOR,
353353
FeatureStorePairSuppress,
354-
FeatureZCRegMove,
354+
FeatureZCRegMoveGPR64,
355355
FeatureZCRegMoveFPR64,
356356
FeatureZCZeroing]>;
357357

@@ -364,7 +364,7 @@ def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",
364364
FeatureFuseAES,
365365
FeatureFuseCryptoEOR,
366366
FeatureStorePairSuppress,
367-
FeatureZCRegMove,
367+
FeatureZCRegMoveGPR64,
368368
FeatureZCRegMoveFPR64,
369369
FeatureZCZeroing]>;
370370

@@ -382,7 +382,7 @@ def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
382382
FeatureFuseCryptoEOR,
383383
FeatureFuseLiterals,
384384
FeatureStorePairSuppress,
385-
FeatureZCRegMove,
385+
FeatureZCRegMoveGPR64,
386386
FeatureZCRegMoveFPR64,
387387
FeatureZCZeroing]>;
388388

@@ -400,7 +400,7 @@ def TuneAppleA15 : SubtargetFeature<"apple-a15", "ARMProcFamily", "AppleA15",
400400
FeatureFuseCryptoEOR,
401401
FeatureFuseLiterals,
402402
FeatureStorePairSuppress,
403-
FeatureZCRegMove,
403+
FeatureZCRegMoveGPR64,
404404
FeatureZCRegMoveFPR64,
405405
FeatureZCZeroing]>;
406406

@@ -418,7 +418,7 @@ def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16",
418418
FeatureFuseCryptoEOR,
419419
FeatureFuseLiterals,
420420
FeatureStorePairSuppress,
421-
FeatureZCRegMove,
421+
FeatureZCRegMoveGPR64,
422422
FeatureZCRegMoveFPR64,
423423
FeatureZCZeroing]>;
424424

@@ -436,7 +436,7 @@ def TuneAppleA17 : SubtargetFeature<"apple-a17", "ARMProcFamily", "AppleA17",
436436
FeatureFuseCryptoEOR,
437437
FeatureFuseLiterals,
438438
FeatureStorePairSuppress,
439-
FeatureZCRegMove,
439+
FeatureZCRegMoveGPR64,
440440
FeatureZCRegMoveFPR64,
441441
FeatureZCZeroing]>;
442442

@@ -453,7 +453,7 @@ def TuneAppleM4 : SubtargetFeature<"apple-m4", "ARMProcFamily", "AppleM4",
453453
FeatureFuseCCSelect,
454454
FeatureFuseCryptoEOR,
455455
FeatureFuseLiterals,
456-
FeatureZCRegMove,
456+
FeatureZCRegMoveGPR64,
457457
FeatureZCRegMoveFPR64,
458458
FeatureZCZeroing
459459
]>;
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s -check-prefixes=NOTCPU-LINUX --match-full-lines
2+
; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=generic | FileCheck %s -check-prefixes=NOTCPU-APPLE --match-full-lines
3+
; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=apple-m1 | FileCheck %s -check-prefixes=CPU --match-full-lines
4+
; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=apple-m1 -mattr=-zcm-gpr64 | FileCheck %s -check-prefixes=NOTATTR --match-full-lines
5+
; RUN: llc < %s -mtriple=arm64-apple-macosx -mattr=+zcm-gpr64 | FileCheck %s -check-prefixes=ATTR --match-full-lines
6+
7+
define void @zero_cycle_regmov_GPR32(i32 %a, i32 %b, i32 %c, i32 %d) {
8+
entry:
9+
; CHECK-LABEL: t:
10+
; NOTCPU-LINUX: mov w0, w2
11+
; NOTCPU-LINUX: mov w1, w3
12+
; NOTCPU-LINUX: mov [[REG2:w[0-9]+]], w3
13+
; NOTCPU-LINUX: mov [[REG1:w[0-9]+]], w2
14+
; NOTCPU-LINUX-NEXT: bl {{_?foo_i32}}
15+
; NOTCPU-LINUX: mov w0, [[REG1]]
16+
; NOTCPU-LINUX: mov w1, [[REG2]]
17+
18+
; NOTCPU-APPLE: mov w0, w2
19+
; NOTCPU-APPLE: mov w1, w3
20+
; NOTCPU-APPLE: mov [[REG2:w[0-9]+]], w3
21+
; NOTCPU-APPLE: mov [[REG1:w[0-9]+]], w2
22+
; NOTCPU-APPLE-NEXT: bl {{_?foo_i32}}
23+
; NOTCPU-APPLE: mov w0, [[REG1]]
24+
; NOTCPU-APPLE: mov w1, [[REG2]]
25+
26+
; CPU: mov [[REG2:x[0-9]+]], x3
27+
; CPU: mov [[REG1:x[0-9]+]], x2
28+
; CPU: mov x0, x2
29+
; CPU: mov x1, x3
30+
; CPU-NEXT: bl {{_?foo_i32}}
31+
; CPU: mov x0, [[REG1]]
32+
; CPU: mov x1, [[REG2]]
33+
34+
; NOTATTR: mov [[REG2:w[0-9]+]], w3
35+
; NOTATTR: mov [[REG1:w[0-9]+]], w2
36+
; NOTATTR: mov w0, w2
37+
; NOTATTR: mov w1, w3
38+
; NOTATTR-NEXT: bl {{_?foo_i32}}
39+
; NOTATTR: mov w0, [[REG1]]
40+
; NOTATTR: mov w1, [[REG2]]
41+
42+
; ATTR: mov x0, x2
43+
; ATTR: mov x1, x3
44+
; ATTR: mov [[REG2:x[0-9]+]], x3
45+
; ATTR: mov [[REG1:x[0-9]+]], x2
46+
; ATTR-NEXT: bl {{_?foo_i32}}
47+
; ATTR: mov x0, [[REG1]]
48+
; ATTR: mov x1, [[REG2]]
49+
%call = call i32 @foo_i32(i32 %c, i32 %d)
50+
%call1 = call i32 @foo_i32(i32 %c, i32 %d)
51+
unreachable
52+
}
53+
54+
declare i32 @foo_i32(i32, i32)

llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-gpr32.ll

Lines changed: 0 additions & 45 deletions
This file was deleted.

0 commit comments

Comments
 (0)