Skip to content

Commit fafef7d

Browse files
committed
Add x86_palignr instructions
This instruction is necessary for implementing `[s|u]widen_high`.
1 parent 0e5e8a6 commit fafef7d

File tree

5 files changed

+36
-4
lines changed

5 files changed

+36
-4
lines changed

cranelift/codegen/meta/src/isa/x86/encodings.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1697,6 +1697,7 @@ fn define_simd(
16971697
let x86_pminu = x86.by_name("x86_pminu");
16981698
let x86_pmullq = x86.by_name("x86_pmullq");
16991699
let x86_pmuludq = x86.by_name("x86_pmuludq");
1700+
let x86_palignr = x86.by_name("x86_palignr");
17001701
let x86_pshufb = x86.by_name("x86_pshufb");
17011702
let x86_pshufd = x86.by_name("x86_pshufd");
17021703
let x86_psll = x86.by_name("x86_psll");
@@ -1901,6 +1902,8 @@ fn define_simd(
19011902
rec_fa.opcodes(low),
19021903
);
19031904
}
1905+
1906+
// SIMD narrow/widen
19041907
for (ty, opcodes) in &[(I16, &PACKSSWB), (I32, &PACKSSDW)] {
19051908
let snarrow = snarrow.bind(vector(*ty, sse_vector_size));
19061909
e.enc_both_inferred(snarrow, rec_fa.opcodes(*opcodes));
@@ -1912,6 +1915,13 @@ fn define_simd(
19121915
let unarrow = unarrow.bind(vector(*ty, sse_vector_size));
19131916
e.enc_both_inferred_maybe_isap(unarrow, rec_fa.opcodes(*opcodes), *isap);
19141917
}
1918+
for ty in &[I8, I16, I32, I64] {
1919+
e.enc_both_inferred_maybe_isap(
1920+
x86_palignr.bind(vector(*ty, sse_vector_size)),
1921+
rec_fa_ib.opcodes(&PALIGNR[..]),
1922+
Some(use_ssse3_simd),
1923+
);
1924+
}
19151925

19161926
// SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8).
19171927
for from_type in ValueType::all_lane_types().filter(allowed_simd_type) {

cranelift/codegen/meta/src/isa/x86/instructions.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -664,6 +664,21 @@ pub(crate) fn define(
664664
.operands_out(vec![a]),
665665
);
666666

667+
let c = &Operand::new("c", uimm8)
668+
.with_doc("The number of bytes to shift right; see PALIGNR in Intel manual for details");
669+
ig.push(
670+
Inst::new(
671+
"x86_palignr",
672+
r#"
673+
Concatenate destination and source operands, extracting a byte-aligned result shifted to
674+
the right by `c`.
675+
"#,
676+
&formats.ternary_imm8,
677+
)
678+
.operands_in(vec![x, y, c])
679+
.operands_out(vec![a]),
680+
);
681+
667682
let i64_t = &TypeVar::new(
668683
"i64_t",
669684
"A scalar 64bit integer",

cranelift/codegen/meta/src/isa/x86/opcodes.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,10 @@ pub static PADDUSB: [u8; 3] = [0x66, 0x0f, 0xdc];
354354
/// Add packed unsigned word integers from xmm2/m128 and xmm1 saturate the results (SSE).
355355
pub static PADDUSW: [u8; 3] = [0x66, 0x0f, 0xdd];
356356

357+
/// Concatenate destination and source operands, extract a byte-aligned result into xmm1 that is
358+
/// shifted to the right by the constant number of bytes in imm8 (SSSE3).
359+
pub static PALIGNR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0f];
360+
357361
/// Bitwise AND of xmm2/m128 and xmm1 (SSE2).
358362
pub static PAND: [u8; 3] = [0x66, 0x0f, 0xdb];
359363

cranelift/codegen/src/isa/aarch64/lower_inst.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2133,6 +2133,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
21332133
| Opcode::X86Insertps
21342134
| Opcode::X86Movsd
21352135
| Opcode::X86Movlhps
2136+
| Opcode::X86Palignr
21362137
| Opcode::X86Psll
21372138
| Opcode::X86Psrl
21382139
| Opcode::X86Psra
Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
test binemit
2-
target x86_64
2+
set enable_simd
3+
target x86_64 has_ssse3=true
34

45
; Ensure raw_bitcast emits no instructions.
56
function %raw_bitcast_i16x8_to_b32x4() {
@@ -10,8 +11,9 @@ block0:
1011
return
1112
}
1213

13-
function %fcvt_32(i32x4) {
14-
block0(v0: i32x4 [%xmm6]):
15-
[-, %xmm2] v1 = fcvt_from_sint.f32x4 v0 ; bin: 40 0f 5b d6
14+
function %conversions_i32x4(i32x4, i32x4) {
15+
block0(v0: i32x4 [%xmm6], v1: i32x4 [%xmm4]):
16+
[-, %xmm2] v2 = fcvt_from_sint.f32x4 v0 ; bin: 40 0f 5b d6
17+
[-, %xmm6] v3 = x86_palignr v0, v1, 3 ; bin: 66 0f 3a 0f f4 03
1618
return
1719
}

0 commit comments

Comments
 (0)