Skip to content

Commit aaaf9ce

Browse files
committed
[X86][AMX] Replace LDTILECFG with PLDTILECFGV on auto-config.
There is intrinsic `@llvm.x86.ldtilecfg` which is lowered to LDTILECFG. This intrinsic is open for user to configure tile registers by themselves. There is a chance that `@llvm.x86.ldtilecfg` would be mixed with the new AMX intrinsics which depend on compiler to configure tile registers. Separate pusedo instruction PLDTILECFGV would avoid unexpected behavious when `@llvm.x86.ldtilecfg` is mixed with new AMX intrinsics. Though user should not mix the two programming model, compiler should avoid crash or UB when they are mixed. Differential Revision: https://reviews.llvm.org/D126519
1 parent cde101d commit aaaf9ce

13 files changed

+40
-39
lines changed

llvm/lib/Target/X86/X86FastPreTileConfig.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -515,7 +515,7 @@ bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
515515
CfgSS = MFI->CreateStackObject(ST->getTileConfigSize(),
516516
ST->getTileConfigAlignment(), false);
517517
LastTileCfg = addFrameReference(
518-
BuildMI(MBB, Before, DebugLoc(), TII->get(X86::LDTILECFG)), CfgSS);
518+
BuildMI(MBB, Before, DebugLoc(), TII->get(X86::PLDTILECFGV)), CfgSS);
519519
LastShapeMI = nullptr;
520520
Change = true;
521521
};

llvm/lib/Target/X86/X86FastTileConfig.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,15 +110,15 @@ bool X86FastTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
110110
bool Change = false;
111111
SmallVector<std::pair<unsigned, ShapeT>, 6> ShapeInfos;
112112
for (MachineInstr &MI : reverse(MBB)) {
113-
if (!isTileDef(MRI, MI) && MI.getOpcode() != X86::LDTILECFG)
113+
if (!isTileDef(MRI, MI) && MI.getOpcode() != X86::PLDTILECFGV)
114114
continue;
115115
// AMX instructions that define tile register.
116-
if (MI.getOpcode() != X86::LDTILECFG) {
116+
if (MI.getOpcode() != X86::PLDTILECFGV) {
117117
MachineOperand &Row = MI.getOperand(1);
118118
MachineOperand &Col = MI.getOperand(2);
119119
unsigned TMMIdx = MI.getOperand(0).getReg() - X86::TMM0;
120120
ShapeInfos.push_back({TMMIdx, ShapeT(&Row, &Col)});
121-
} else { // LDTILECFG
121+
} else { // PLDTILECFGV
122122
// Rewrite the shape information to memory. Stack slot should have
123123
// been initialized to zero in pre config.
124124
int SS = MI.getOperand(0).getIndex(); // tile config stack slot.

llvm/lib/Target/X86/X86InstrAMX.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ let Predicates = [HasAMXTILE, In64BitMode] in {
4848
VEX, T8XD;
4949

5050
// Pseduo instruction for RA.
51-
let isPseudo = true, mayLoad = 1 in
52-
def PLDTILECFGV : PseudoI<(outs), (ins opaquemem:$src),
53-
[(int_x86_ldtilecfg_internal addr:$src)]>;
51+
let isPseudo = true, mayLoad = 1, hasSideEffects = 1,
52+
Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
53+
def PLDTILECFGV : PseudoI<(outs), (ins opaquemem:$src), []>;
5454
let isPseudo = true, mayLoad = 1 in
5555
def PTILELOADDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
5656
GR16:$src2,

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7360,7 +7360,7 @@ bool X86InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
73607360
// ENDBR instructions should not be scheduled around.
73617361
unsigned Opcode = MI.getOpcode();
73627362
if (Opcode == X86::ENDBR64 || Opcode == X86::ENDBR32 ||
7363-
Opcode == X86::LDTILECFG)
7363+
Opcode == X86::PLDTILECFGV)
73647364
return true;
73657365

73667366
return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF);

llvm/lib/Target/X86/X86PreTileConfig.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,7 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
369369
// multi insert.
370370
if (VisitedOrInserted.insert(I).second) {
371371
auto II = I.MI ? I.MI->getIterator() : I.MBB->instr_begin();
372-
addFrameReference(BuildMI(*I.MBB, ++II, DL, TII->get(X86::LDTILECFG)),
372+
addFrameReference(BuildMI(*I.MBB, ++II, DL, TII->get(X86::PLDTILECFGV)),
373373
SS);
374374
}
375375
}

llvm/lib/Target/X86/X86TileConfig.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,15 +90,15 @@ bool X86TileConfig::runOnMachineFunction(MachineFunction &MF) {
9090
int SS = INT_MAX;
9191
for (MachineBasicBlock &MBB : MF) {
9292
for (MachineInstr &MI : MBB) {
93-
if (MI.getOpcode() == X86::LDTILECFG) {
93+
if (MI.getOpcode() == X86::PLDTILECFGV) {
9494
SS = MI.getOperand(0).getIndex();
9595
break;
9696
}
9797
}
9898
if (SS != INT_MAX)
9999
break;
100100
}
101-
// Didn't find LDTILECFG, just return false;
101+
// Didn't find PLDTILECFGV, just return false;
102102
if (SS == INT_MAX)
103103
return false;
104104

llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ body: |
114114
; CHECK-NEXT: {{ $}}
115115
; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 64
116116
; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 16
117-
; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
117+
; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
118118
; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_nosp = LEA64r %stack.0, 1, $noreg, 0, $noreg
119119
; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri1]], [[MOV16ri]]
120120
; CHECK-NEXT: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 64
@@ -127,7 +127,7 @@ body: |
127127
; CHECK-NEXT: [[MOV32ri64_:%[0-9]+]]:gr64_nosp = MOV32ri64 32
128128
; CHECK-NEXT: [[MOV16ri2:%[0-9]+]]:gr16 = MOV16ri 64
129129
; CHECK-NEXT: [[MOV16ri3:%[0-9]+]]:gr16 = MOV16ri 16
130-
; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
130+
; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
131131
; CHECK-NEXT: [[LEA64r1:%[0-9]+]]:gr64_nosp = LEA64r %stack.2, 1, $noreg, 0, $noreg
132132
; CHECK-NEXT: [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri3]], [[MOV16ri2]], [[COPY3]], 1, killed [[MOV32ri64_]], 0, $noreg
133133
; CHECK-NEXT: [[MOV64ri1:%[0-9]+]]:gr64_nosp = MOV64ri 64
@@ -139,29 +139,29 @@ body: |
139139
; CHECK-NEXT: [[PHI:%[0-9]+]]:gr16 = PHI [[MOV16ri]], %bb.1, [[MOV16ri2]], %bb.2
140140
; CHECK-NEXT: [[PHI1:%[0-9]+]]:gr16 = PHI [[MOV16ri1]], %bb.1, [[MOV16ri3]], %bb.2
141141
; CHECK-NEXT: [[PHI2:%[0-9]+]]:gr64_nosp = PHI [[LEA64r]], %bb.1, [[LEA64r1]], %bb.2
142-
; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
142+
; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
143143
; CHECK-NEXT: [[MOV64ri2:%[0-9]+]]:gr64_nosp = MOV64ri 64
144144
; CHECK-NEXT: [[LEA64r2:%[0-9]+]]:gr64_nosp = LEA64r %stack.5, 1, $noreg, 0, $noreg
145145
; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[PHI1]], [[PHI]], [[PHI2]], 1, killed [[MOV64ri2]], 0, $noreg
146146
; CHECK-NEXT: [[MOV64ri3:%[0-9]+]]:gr64_nosp = MOV64ri 64
147147
; CHECK-NEXT: TILESTORED %stack.5, 1, killed [[MOV64ri3]], 0, $noreg, [[PTILELOADDV1]] :: (store (s8192) into %stack.5)
148148
; CHECK-NEXT: [[MOV16ri4:%[0-9]+]]:gr16 = MOV16ri 64
149149
; CHECK-NEXT: [[MOV16ri5:%[0-9]+]]:gr16 = MOV16ri 16
150-
; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
150+
; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
151151
; CHECK-NEXT: [[PTILEZEROV1:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri5]], [[MOV16ri4]]
152152
; CHECK-NEXT: [[MOV64ri4:%[0-9]+]]:gr64_nosp = MOV64ri 64
153153
; CHECK-NEXT: TILESTORED %stack.4, 1, killed [[MOV64ri4]], 0, $noreg, [[PTILEZEROV1]] :: (store (s8192) into %stack.4)
154154
; CHECK-NEXT: [[MOV16ri6:%[0-9]+]]:gr16 = MOV16ri 64
155155
; CHECK-NEXT: [[MOV16ri7:%[0-9]+]]:gr16 = MOV16ri 16
156-
; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
156+
; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
157157
; CHECK-NEXT: [[PTILEZEROV2:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri7]], [[MOV16ri6]]
158158
; CHECK-NEXT: [[MOV64ri5:%[0-9]+]]:gr64_nosp = MOV64ri 64
159159
; CHECK-NEXT: TILESTORED %stack.3, 1, killed [[MOV64ri5]], 0, $noreg, [[PTILEZEROV2]] :: (store (s8192) into %stack.3)
160160
; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags
161161
; CHECK-NEXT: JMP_1 %bb.5
162162
; CHECK-NEXT: {{ $}}
163163
; CHECK-NEXT: bb.4:
164-
; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
164+
; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
165165
; CHECK-NEXT: [[MOV32ri64_1:%[0-9]+]]:gr64_nosp = MOV32ri64 32
166166
; CHECK-NEXT: [[MOV16ri8:%[0-9]+]]:gr16 = MOV16ri 64
167167
; CHECK-NEXT: [[MOV16ri9:%[0-9]+]]:gr16 = MOV16ri 16
@@ -177,14 +177,14 @@ body: |
177177
; CHECK-NEXT: [[PHI4:%[0-9]+]]:gr16 = PHI [[PHI]], %bb.3, %60, %bb.8
178178
; CHECK-NEXT: [[PHI5:%[0-9]+]]:gr16 = PHI [[PHI1]], %bb.3, %59, %bb.8
179179
; CHECK-NEXT: [[PHI6:%[0-9]+]]:gr64_nosp = PHI [[LEA64r2]], %bb.3, %58, %bb.8
180-
; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
180+
; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
181181
; CHECK-NEXT: [[MOV64ri7:%[0-9]+]]:gr64_nosp = MOV64ri 64
182182
; CHECK-NEXT: [[PTILELOADDV3:%[0-9]+]]:tile = PTILELOADDV [[PHI5]], [[PHI4]], [[PHI6]], 1, killed [[MOV64ri7]], 0, $noreg
183183
; CHECK-NEXT: [[MOV64ri8:%[0-9]+]]:gr64_nosp = MOV64ri 64
184184
; CHECK-NEXT: TILESTORED %stack.8, 1, killed [[MOV64ri8]], 0, $noreg, [[PTILELOADDV3]] :: (store (s8192) into %stack.8)
185185
; CHECK-NEXT: [[MOV16ri10:%[0-9]+]]:gr16 = MOV16ri 64
186186
; CHECK-NEXT: [[MOV16ri11:%[0-9]+]]:gr16 = MOV16ri 16
187-
; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
187+
; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
188188
; CHECK-NEXT: [[MOV64ri9:%[0-9]+]]:gr64_nosp = MOV64ri 64
189189
; CHECK-NEXT: [[PTILELOADDV4:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri7]], [[MOV16ri6]], %stack.3, 1, killed [[MOV64ri9]], 0, $noreg :: (load (s8192) from %stack.3)
190190
; CHECK-NEXT: [[MOV64ri10:%[0-9]+]]:gr64_nosp = MOV64ri 64
@@ -200,7 +200,7 @@ body: |
200200
; CHECK-NEXT: {{ $}}
201201
; CHECK-NEXT: [[MOV16ri12:%[0-9]+]]:gr16 = MOV16ri 64
202202
; CHECK-NEXT: [[MOV16ri13:%[0-9]+]]:gr16 = MOV16ri 16
203-
; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
203+
; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
204204
; CHECK-NEXT: [[LEA64r3:%[0-9]+]]:gr64_nosp = LEA64r %stack.6, 1, $noreg, 0, $noreg
205205
; CHECK-NEXT: [[PTILEZEROV3:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri13]], [[MOV16ri12]]
206206
; CHECK-NEXT: [[MOV64ri12:%[0-9]+]]:gr64_nosp = MOV64ri 64
@@ -213,7 +213,7 @@ body: |
213213
; CHECK-NEXT: [[MOV32ri64_2:%[0-9]+]]:gr64_nosp = MOV32ri64 32
214214
; CHECK-NEXT: [[MOV16ri14:%[0-9]+]]:gr16 = MOV16ri 64
215215
; CHECK-NEXT: [[MOV16ri15:%[0-9]+]]:gr16 = MOV16ri 16
216-
; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
216+
; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
217217
; CHECK-NEXT: [[LEA64r4:%[0-9]+]]:gr64_nosp = LEA64r %stack.7, 1, $noreg, 0, $noreg
218218
; CHECK-NEXT: [[PTILELOADDV7:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri15]], [[MOV16ri14]], [[COPY3]], 1, killed [[MOV32ri64_2]], 0, $noreg
219219
; CHECK-NEXT: [[MOV64ri13:%[0-9]+]]:gr64_nosp = MOV64ri 64
@@ -225,7 +225,7 @@ body: |
225225
; CHECK-NEXT: [[PHI7:%[0-9]+]]:gr16 = PHI [[MOV16ri12]], %bb.6, [[MOV16ri14]], %bb.7
226226
; CHECK-NEXT: [[PHI8:%[0-9]+]]:gr16 = PHI [[MOV16ri13]], %bb.6, [[MOV16ri15]], %bb.7
227227
; CHECK-NEXT: [[PHI9:%[0-9]+]]:gr64_nosp = PHI [[LEA64r3]], %bb.6, [[LEA64r4]], %bb.7
228-
; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
228+
; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
229229
; CHECK-NEXT: [[MOV64ri14:%[0-9]+]]:gr64_nosp = MOV64ri 64
230230
; CHECK-NEXT: [[PTILELOADDV8:%[0-9]+]]:tile = PTILELOADDV [[PHI8]], [[PHI7]], [[PHI9]], 1, killed [[MOV64ri14]], 0, $noreg
231231
; CHECK-NEXT: [[MOV64ri15:%[0-9]+]]:gr64_nosp = MOV64ri 64

llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ body: |
5151
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY killed [[COPY]]
5252
; CHECK-NEXT: %r0:gr16 = MOV16ri 64
5353
; CHECK-NEXT: %c0:gr16 = MOV16ri 16
54-
; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
54+
; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
5555
; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_nosp = LEA64r %stack.0, 1, $noreg, 0, $noreg
5656
; CHECK-NEXT: %t0:tile = PTILEZEROV %r0, %c0
5757
; CHECK-NEXT: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 64
@@ -66,7 +66,7 @@ body: |
6666
; CHECK-NEXT: {{ $}}
6767
; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 64
6868
; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 16
69-
; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
69+
; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
7070
; CHECK-NEXT: [[LEA64r1:%[0-9]+]]:gr64_nosp = LEA64r %stack.2, 1, $noreg, 0, $noreg
7171
; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri1]], [[MOV16ri]]
7272
; CHECK-NEXT: [[MOV64ri1:%[0-9]+]]:gr64_nosp = MOV64ri 64
@@ -79,7 +79,7 @@ body: |
7979
; CHECK-NEXT: [[PHI:%[0-9]+]]:gr16 = PHI %c0, %bb.0, %24, %bb.3
8080
; CHECK-NEXT: [[PHI1:%[0-9]+]]:gr16 = PHI %r0, %bb.0, %23, %bb.3
8181
; CHECK-NEXT: [[PHI2:%[0-9]+]]:gr64_nosp = PHI [[LEA64r]], %bb.0, %22, %bb.3
82-
; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
82+
; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
8383
; CHECK-NEXT: [[MOV64ri2:%[0-9]+]]:gr64_nosp = MOV64ri 64
8484
; CHECK-NEXT: [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[PHI1]], [[PHI]], [[PHI2]], 1, killed [[MOV64ri2]], 0, $noreg
8585
; CHECK-NEXT: [[MOV64ri3:%[0-9]+]]:gr64_nosp = MOV64ri 64
@@ -92,7 +92,7 @@ body: |
9292
; CHECK-NEXT: [[PHI3:%[0-9]+]]:gr16 = PHI [[MOV16ri]], %bb.1, [[PHI]], %bb.2
9393
; CHECK-NEXT: [[PHI4:%[0-9]+]]:gr16 = PHI [[MOV16ri1]], %bb.1, [[PHI1]], %bb.2
9494
; CHECK-NEXT: [[PHI5:%[0-9]+]]:gr64_nosp = PHI [[LEA64r1]], %bb.1, [[PHI2]], %bb.2
95-
; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
95+
; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
9696
; CHECK-NEXT: [[MOV64ri4:%[0-9]+]]:gr64_nosp = MOV64ri 64
9797
; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[PHI4]], [[PHI3]], [[PHI5]], 1, killed [[MOV64ri4]], 0, $noreg
9898
; CHECK-NEXT: [[MOV64ri5:%[0-9]+]]:gr64_nosp = MOV64ri 64

0 commit comments

Comments
 (0)