Skip to content

Commit fd894f6

Browse files
authored
[AMDGPU] gfx1250 MC support for v_mov_b64 (#147859)
It is incomplete in terms of the DPP diagnistics, that is much more involved change.
1 parent 617af3c commit fd894f6

File tree

5 files changed

+87
-5
lines changed

5 files changed

+87
-5
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2305,6 +2305,10 @@ def isNotGFX1250Plus :
23052305
Predicate<"!Subtarget->hasGFX1250Insts()">,
23062306
AssemblerPredicate<(all_of (not FeatureGFX1250Insts))>;
23072307

2308+
def isGFX940orGFX1250 :
2309+
Predicate<"Subtarget->hasGFX940Insts() || Subtarget->hasGFX1250Insts()">,
2310+
AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX1250Insts)>;
2311+
23082312
def HasIEEEMinimumMaximumInsts :
23092313
Predicate<"Subtarget->hasIEEEMinimumMaximumInsts()">,
23102314
AssemblerPredicate<(all_of FeatureIEEEMinimumMaximumInsts)>;

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,14 @@ multiclass VOP1Inst <string opName, VOPProfile P,
140140
if P.HasExtDPP then
141141
def _dpp : VOP1_DPP_Pseudo <opName, P>;
142142

143-
let SubtargetPredicate = isGFX11Plus in {
144-
if P.HasExtVOP3DPP then
145-
def _e64_dpp : VOP3_DPP_Pseudo <opName, P>;
146-
} // End SubtargetPredicate = isGFX11Plus
143+
if P.HasExtVOP3DPP then
144+
def _e64_dpp : VOP3_DPP_Pseudo <opName, P> {
145+
let SubtargetPredicate = isGFX11Plus;
146+
}
147+
else if P.HasExt64BitDPP then
148+
def _e64_dpp : VOP3_DPP_Pseudo <opName, P> {
149+
let OtherPredicates = [HasDPALU_DPP];
150+
}
147151

148152
def : LetDummies, AMDGPUMnemonicAlias<opName#"_e32", opName>;
149153
def : LetDummies, AMDGPUMnemonicAlias<opName#"_e64", opName>;
@@ -236,7 +240,7 @@ def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> {
236240
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
237241
defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>;
238242

239-
let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in
243+
let SubtargetPredicate = isGFX940orGFX1250, SchedRW = [Write64Bit] in
240244
defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>;
241245
} // End isMoveImm = 1
242246

@@ -1117,6 +1121,8 @@ defm V_CVT_NORM_U16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x064>;
11171121
defm V_CVT_F16_F32 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00a>;
11181122
defm V_CVT_F32_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00b>;
11191123

1124+
defm V_MOV_B64 : VOP1_Real_FULL <GFX1250Gen, 0x1d>;
1125+
11201126
defm V_CVT_F32_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">;
11211127
defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>;
11221128
defm V_CVT_PK_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x076>;

llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,30 @@
11
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
22
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
33

4+
v_mov_b64_e32 v[4:5], v[2:3]
5+
// GFX1250: v_mov_b64_e32 v[4:5], v[2:3] ; encoding: [0x02,0x3b,0x08,0x7e]
6+
7+
v_mov_b64 v[4:5], v[254:255]
8+
// GFX1250: v_mov_b64_e32 v[4:5], v[254:255] ; encoding: [0xfe,0x3b,0x08,0x7e]
9+
10+
v_mov_b64 v[4:5], s[2:3]
11+
// GFX1250: v_mov_b64_e32 v[4:5], s[2:3] ; encoding: [0x02,0x3a,0x08,0x7e]
12+
13+
v_mov_b64 v[4:5], vcc
14+
// GFX1250: v_mov_b64_e32 v[4:5], vcc ; encoding: [0x6a,0x3a,0x08,0x7e]
15+
16+
v_mov_b64 v[4:5], exec
17+
// GFX1250: v_mov_b64_e32 v[4:5], exec ; encoding: [0x7e,0x3a,0x08,0x7e]
18+
19+
v_mov_b64 v[4:5], null
20+
// GFX1250: v_mov_b64_e32 v[4:5], null ; encoding: [0x7c,0x3a,0x08,0x7e]
21+
22+
v_mov_b64 v[4:5], -1
23+
// GFX1250: v_mov_b64_e32 v[4:5], -1 ; encoding: [0xc1,0x3a,0x08,0x7e]
24+
25+
v_mov_b64 v[4:5], 0.5
26+
// GFX1250: v_mov_b64_e32 v[4:5], 0.5 ; encoding: [0xf0,0x3a,0x08,0x7e]
27+
428
v_cvt_f32_bf16 v5, v1
529
// GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e]
630

llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,30 @@
11
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
22
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
33

4+
v_mov_b64_e32 v[4:5], v[2:3]
5+
// GFX1250: v_mov_b64_e32 v[4:5], v[2:3] ; encoding: [0x02,0x3b,0x08,0x7e]
6+
7+
v_mov_b64 v[4:5], v[254:255]
8+
// GFX1250: v_mov_b64_e32 v[4:5], v[254:255] ; encoding: [0xfe,0x3b,0x08,0x7e]
9+
10+
v_mov_b64 v[4:5], s[2:3]
11+
// GFX1250: v_mov_b64_e32 v[4:5], s[2:3] ; encoding: [0x02,0x3a,0x08,0x7e]
12+
13+
v_mov_b64 v[4:5], vcc
14+
// GFX1250: v_mov_b64_e32 v[4:5], vcc ; encoding: [0x6a,0x3a,0x08,0x7e]
15+
16+
v_mov_b64 v[4:5], exec
17+
// GFX1250: v_mov_b64_e32 v[4:5], exec ; encoding: [0x7e,0x3a,0x08,0x7e]
18+
19+
v_mov_b64 v[4:5], null
20+
// GFX1250: v_mov_b64_e32 v[4:5], null ; encoding: [0x7c,0x3a,0x08,0x7e]
21+
22+
v_mov_b64 v[4:5], -1
23+
// GFX1250: v_mov_b64_e32 v[4:5], -1 ; encoding: [0xc1,0x3a,0x08,0x7e]
24+
25+
v_mov_b64 v[4:5], 0.5
26+
// GFX1250: v_mov_b64_e32 v[4:5], 0.5 ; encoding: [0xf0,0x3a,0x08,0x7e]
27+
428
v_cvt_f32_bf16 v5, v1
529
// GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e]
630

llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,30 @@
22
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s
33
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
44

5+
0xc1,0x3a,0x08,0x7e
6+
# GFX1250: v_mov_b64_e32 v[4:5], -1 ; encoding: [0xc1,0x3a,0x08,0x7e]
7+
8+
0xf0,0x3a,0x08,0x7e
9+
# GFX1250: v_mov_b64_e32 v[4:5], 0.5 ; encoding: [0xf0,0x3a,0x08,0x7e]
10+
11+
0x7e,0x3a,0x08,0x7e
12+
# GFX1250: v_mov_b64_e32 v[4:5], exec ; encoding: [0x7e,0x3a,0x08,0x7e]
13+
14+
0x7c,0x3a,0x08,0x7e
15+
# GFX1250: v_mov_b64_e32 v[4:5], null ; encoding: [0x7c,0x3a,0x08,0x7e]
16+
17+
0x02,0x3a,0x08,0x7e
18+
# GFX1250: v_mov_b64_e32 v[4:5], s[2:3] ; encoding: [0x02,0x3a,0x08,0x7e]
19+
20+
0xfe,0x3b,0x08,0x7e
21+
# GFX1250: v_mov_b64_e32 v[4:5], v[254:255] ; encoding: [0xfe,0x3b,0x08,0x7e]
22+
23+
0x02,0x3b,0x08,0x7e
24+
# GFX1250: v_mov_b64_e32 v[4:5], v[2:3] ; encoding: [0x02,0x3b,0x08,0x7e]
25+
26+
0x6a,0x3a,0x08,0x7e
27+
# GFX1250: v_mov_b64_e32 v[4:5], vcc ; encoding: [0x6a,0x3a,0x08,0x7e]
28+
529
0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00
630
# GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00]
731

0 commit comments

Comments
 (0)