Skip to content

Commit c8ddf8a

Browse files
committed
Encode [u|s]widen_low for x86
1 parent fafef7d commit c8ddf8a

File tree

5 files changed

+103
-10
lines changed

5 files changed

+103
-10
lines changed

cranelift/codegen/meta/src/isa/x86/encodings.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1669,6 +1669,7 @@ fn define_simd(
16691669
let ssub_sat = shared.by_name("ssub_sat");
16701670
let store = shared.by_name("store");
16711671
let store_complex = shared.by_name("store_complex");
1672+
let swiden_low = shared.by_name("swiden_low");
16721673
let uadd_sat = shared.by_name("uadd_sat");
16731674
let uload8x8 = shared.by_name("uload8x8");
16741675
let uload8x8_complex = shared.by_name("uload8x8_complex");
@@ -1678,6 +1679,7 @@ fn define_simd(
16781679
let uload32x2_complex = shared.by_name("uload32x2_complex");
16791680
let snarrow = shared.by_name("snarrow");
16801681
let unarrow = shared.by_name("unarrow");
1682+
let uwiden_low = shared.by_name("uwiden_low");
16811683
let ushr_imm = shared.by_name("ushr_imm");
16821684
let usub_sat = shared.by_name("usub_sat");
16831685
let vconst = shared.by_name("vconst");
@@ -1915,6 +1917,16 @@ fn define_simd(
19151917
let unarrow = unarrow.bind(vector(*ty, sse_vector_size));
19161918
e.enc_both_inferred_maybe_isap(unarrow, rec_fa.opcodes(*opcodes), *isap);
19171919
}
1920+
for (ty, swiden_opcode, uwiden_opcode) in &[
1921+
(I8, &PMOVSXBW[..], &PMOVZXBW[..]),
1922+
(I16, &PMOVSXWD[..], &PMOVZXWD[..]),
1923+
] {
1924+
let isap = Some(use_sse41_simd);
1925+
let swiden_low = swiden_low.bind(vector(*ty, sse_vector_size));
1926+
e.enc_both_inferred_maybe_isap(swiden_low, rec_furm.opcodes(*swiden_opcode), isap);
1927+
let uwiden_low = uwiden_low.bind(vector(*ty, sse_vector_size));
1928+
e.enc_both_inferred_maybe_isap(uwiden_low, rec_furm.opcodes(*uwiden_opcode), isap);
1929+
}
19181930
for ty in &[I8, I16, I32, I64] {
19191931
e.enc_both_inferred_maybe_isap(
19201932
x86_palignr.bind(vector(*ty, sse_vector_size)),

cranelift/codegen/meta/src/isa/x86/opcodes.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -477,7 +477,7 @@ pub static PMOVSXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x20];
477477
pub static PMOVSXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x23];
478478

479479
/// Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
480-
/// integers in xmm1.
480+
/// integers in xmm1 (SSE4.1).
481481
pub static PMOVSXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x25];
482482

483483
/// Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit
@@ -489,7 +489,7 @@ pub static PMOVZXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x30];
489489
pub static PMOVZXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x33];
490490

491491
/// Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
492-
/// integers in xmm1.
492+
/// integers in xmm1 (SSE4.1).
493493
pub static PMOVZXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x35];
494494

495495
/// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of

cranelift/codegen/meta/src/shared/instructions.rs

Lines changed: 75 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3883,19 +3883,19 @@ pub(crate) fn define(
38833883
.constraints(vec![WiderOrEq(Int.clone(), IntTo.clone())]),
38843884
);
38853885

3886-
let I16xN = &TypeVar::new(
3887-
"I16xN",
3888-
"A SIMD vector type containing integers 16-bits wide and up",
3886+
let I16or32xN = &TypeVar::new(
3887+
"I16or32xN",
3888+
"A SIMD vector type containing integer lanes 16 or 32 bits wide",
38893889
TypeSetBuilder::new()
38903890
.ints(16..32)
38913891
.simd_lanes(4..8)
38923892
.includes_scalars(false)
38933893
.build(),
38943894
);
38953895

3896-
let x = &Operand::new("x", I16xN);
3897-
let y = &Operand::new("y", I16xN);
3898-
let a = &Operand::new("a", &I16xN.split_lanes());
3896+
let x = &Operand::new("x", I16or32xN);
3897+
let y = &Operand::new("y", I16or32xN);
3898+
let a = &Operand::new("a", &I16or32xN.split_lanes());
38993899

39003900
ig.push(
39013901
Inst::new(
@@ -3934,6 +3934,75 @@ pub(crate) fn define(
39343934
.operands_out(vec![a]),
39353935
);
39363936

3937+
let I8or16xN = &TypeVar::new(
3938+
"I8or16xN",
3939+
"A SIMD vector type containing integer lanes 8 or 16 bits wide.",
3940+
TypeSetBuilder::new()
3941+
.ints(8..16)
3942+
.simd_lanes(8..16)
3943+
.includes_scalars(false)
3944+
.build(),
3945+
);
3946+
3947+
let x = &Operand::new("x", I8or16xN);
3948+
let a = &Operand::new("a", &I8or16xN.merge_lanes());
3949+
3950+
ig.push(
3951+
Inst::new(
3952+
"swiden_low",
3953+
r#"
3954+
Widen the low lanes of `x` using signed extension.
3955+
3956+
This will double the lane width and halve the number of lanes.
3957+
"#,
3958+
&formats.unary,
3959+
)
3960+
.operands_in(vec![x])
3961+
.operands_out(vec![a]),
3962+
);
3963+
3964+
ig.push(
3965+
Inst::new(
3966+
"swiden_high",
3967+
r#"
3968+
Widen the high lanes of `x` using signed extension.
3969+
3970+
This will double the lane width and halve the number of lanes.
3971+
"#,
3972+
&formats.unary,
3973+
)
3974+
.operands_in(vec![x])
3975+
.operands_out(vec![a]),
3976+
);
3977+
3978+
ig.push(
3979+
Inst::new(
3980+
"uwiden_low",
3981+
r#"
3982+
Widen the low lanes of `x` using unsigned extension.
3983+
3984+
This will double the lane width and halve the number of lanes.
3985+
"#,
3986+
&formats.unary,
3987+
)
3988+
.operands_in(vec![x])
3989+
.operands_out(vec![a]),
3990+
);
3991+
3992+
ig.push(
3993+
Inst::new(
3994+
"uwiden_high",
3995+
r#"
3996+
Widen the high lanes of `x` using unsigned extension.
3997+
3998+
This will double the lane width and halve the number of lanes.
3999+
"#,
4000+
&formats.unary,
4001+
)
4002+
.operands_in(vec![x])
4003+
.operands_out(vec![a]),
4004+
);
4005+
39374006
let IntTo = &TypeVar::new(
39384007
"IntTo",
39394008
"A larger integer type with the same number of lanes",

cranelift/codegen/src/isa/aarch64/lower_inst.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2154,7 +2154,12 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
21542154

21552155
Opcode::AvgRound => unimplemented!(),
21562156
Opcode::Iabs => unimplemented!(),
2157-
Opcode::Snarrow | Opcode::Unarrow => unimplemented!(),
2157+
Opcode::Snarrow
2158+
| Opcode::Unarrow
2159+
| Opcode::SwidenLow
2160+
| Opcode::SwidenHigh
2161+
| Opcode::UwidenLow
2162+
| Opcode::UwidenHigh => unimplemented!(),
21582163
Opcode::TlsValue => unimplemented!(),
21592164
}
21602165

cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
test binemit
22
set enable_simd
3-
target x86_64 has_ssse3=true
3+
target x86_64 nehalem
44

55
; Ensure raw_bitcast emits no instructions.
66
function %raw_bitcast_i16x8_to_b32x4() {
@@ -17,3 +17,10 @@ block0(v0: i32x4 [%xmm6], v1: i32x4 [%xmm4]):
1717
[-, %xmm6] v3 = x86_palignr v0, v1, 3 ; bin: 66 0f 3a 0f f4 03
1818
return
1919
}
20+
21+
function %conversions_i16x8(i16x8) {
22+
block0(v0: i16x8 [%xmm6]):
23+
[-, %xmm2] v1 = swiden_low v0 ; bin: 66 0f 38 23 d6
24+
[-, %xmm11] v2 = uwiden_low v0 ; bin: 66 44 0f 38 33 de
25+
return
26+
}

0 commit comments

Comments
 (0)