Skip to content

Commit 68c84b6

Browse files
committed
Implement LLVM x86 bmi intrinsics
1 parent 46c5332 commit 68c84b6

File tree

3 files changed

+330
-0
lines changed

3 files changed

+330
-0
lines changed

src/tools/miri/src/shims/x86/bmi.rs

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
use rustc_span::Symbol;
2+
use rustc_target::spec::abi::Abi;
3+
4+
use crate::*;
5+
6+
impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
7+
pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
8+
fn emulate_x86_bmi_intrinsic(
9+
&mut self,
10+
link_name: Symbol,
11+
abi: Abi,
12+
args: &[OpTy<'tcx>],
13+
dest: &MPlaceTy<'tcx>,
14+
) -> InterpResult<'tcx, EmulateItemResult> {
15+
let this = self.eval_context_mut();
16+
17+
// Prefix should have already been checked.
18+
let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.bmi.").unwrap();
19+
20+
// The intrinsics are suffixed with the bit size of their operands.
21+
let (is_64_bit, unprefixed_name) = if unprefixed_name.ends_with("64") {
22+
(true, unprefixed_name.strip_suffix(".64").unwrap_or(""))
23+
} else {
24+
(false, unprefixed_name.strip_suffix(".32").unwrap_or(""))
25+
};
26+
27+
// All intrinsics of the "bmi" namespace belong to the "bmi2" ISA extension.
28+
// The exception is "bextr", which belongs to "bmi1".
29+
let target_feature = if unprefixed_name == "bextr" { "bmi1" } else { "bmi2" };
30+
this.expect_target_feature_for_intrinsic(link_name, target_feature)?;
31+
32+
if is_64_bit && this.tcx.sess.target.arch != "x86_64" {
33+
return Ok(EmulateItemResult::NotSupported);
34+
}
35+
36+
let [left, right] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
37+
let left = this.read_scalar(left)?;
38+
let right = this.read_scalar(right)?;
39+
40+
let left = if is_64_bit { left.to_u64()? } else { u64::from(left.to_u32()?) };
41+
let right = if is_64_bit { right.to_u64()? } else { u64::from(right.to_u32()?) };
42+
43+
let result = match unprefixed_name {
44+
// Extract a contigous range of bits from an unsigned integer.
45+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bextr_u32
46+
"bextr" => {
47+
let start = u32::try_from(right & 0xff).unwrap();
48+
let len = u32::try_from((right >> 8) & 0xff).unwrap();
49+
let shifted = left.checked_shr(start).unwrap_or(0);
50+
// Keep the `len` lowest bits of `shifted`, or all bits if `len` is too big.
51+
if len >= 64 { shifted } else { shifted & 1u64.wrapping_shl(len).wrapping_sub(1) }
52+
}
53+
// Create a copy of an unsigned integer with bits above a certain index cleared.
54+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bzhi_u32
55+
"bzhi" => {
56+
let index = u32::try_from(right & 0xff).unwrap();
57+
// Keep the `index` lowest bits of `left`, or all bits if `index` is too big.
58+
if index >= 64 { left } else { left & 1u64.wrapping_shl(index).wrapping_sub(1) }
59+
}
60+
// Extract bit values of an unsigned integer at positions marked by a mask.
61+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pext_u32
62+
"pext" => {
63+
let mut mask = right;
64+
let mut i = 0u32;
65+
let mut result = 0;
66+
// Iterate over the mask one 1-bit at a time, from
67+
// the least significant bit to the most significant bit.
68+
while mask != 0 {
69+
// Extract the bit marked by the mask's least significant set bit
70+
// and put it at position `i` of the result.
71+
result |= u64::from(left & (1 << mask.trailing_zeros()) != 0) << i;
72+
i = i.wrapping_add(1);
73+
// Clear the least significant set bit.
74+
mask &= mask.wrapping_sub(1);
75+
}
76+
result
77+
}
78+
// Deposit bit values of an unsigned integer to positions marked by a mask.
79+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pdep_u32
80+
"pdep" => {
81+
let mut mask = right;
82+
let mut set = left;
83+
let mut result = 0;
84+
// Iterate over the mask one 1-bit at a time, from
85+
// the least significant bit to the most significant bit.
86+
while mask != 0 {
87+
// Put rightmost bit of `set` at the position of the current `mask` bit.
88+
result |= (set & 1) << mask.trailing_zeros();
89+
// Go to next bit of `set`.
90+
set >>= 1;
91+
// Clear the least significant set bit.
92+
mask &= mask.wrapping_sub(1);
93+
}
94+
result
95+
}
96+
_ => return Ok(EmulateItemResult::NotSupported),
97+
};
98+
99+
let result = if is_64_bit {
100+
Scalar::from_u64(result)
101+
} else {
102+
Scalar::from_u32(u32::try_from(result).unwrap())
103+
};
104+
this.write_scalar(result, dest)?;
105+
106+
Ok(EmulateItemResult::NeedsReturn)
107+
}
108+
}

src/tools/miri/src/shims/x86/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ use helpers::bool_to_simd_element;
1414
mod aesni;
1515
mod avx;
1616
mod avx2;
17+
mod bmi;
1718
mod sse;
1819
mod sse2;
1920
mod sse3;
@@ -113,6 +114,11 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
113114
pclmulqdq(this, left, right, imm, dest)?;
114115
}
115116

117+
name if name.starts_with("bmi.") => {
118+
return bmi::EvalContextExt::emulate_x86_bmi_intrinsic(
119+
this, link_name, abi, args, dest,
120+
);
121+
}
116122
name if name.starts_with("sse.") => {
117123
return sse::EvalContextExt::emulate_x86_sse_intrinsic(
118124
this, link_name, abi, args, dest,
Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
// Ignore everything except x86 and x86_64
2+
// Any new targets that are added to CI should be ignored here.
3+
// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.)
4+
//@ignore-target-aarch64
5+
//@ignore-target-arm
6+
//@ignore-target-avr
7+
//@ignore-target-s390x
8+
//@ignore-target-thumbv7em
9+
//@ignore-target-wasm32
10+
//@compile-flags: -C target-feature=+bmi1,+bmi2
11+
12+
#[cfg(target_arch = "x86")]
13+
use std::arch::x86::*;
14+
#[cfg(target_arch = "x86_64")]
15+
use std::arch::x86_64::*;
16+
17+
fn main() {
18+
// BMI1 and BMI2 are independent from each other, so both must be checked.
19+
assert!(is_x86_feature_detected!("bmi1"));
20+
assert!(is_x86_feature_detected!("bmi2"));
21+
22+
unsafe {
23+
test_bmi_32();
24+
test_bmi_64();
25+
}
26+
}
27+
28+
/// Test the 32-bit variants of the intrinsics.
29+
unsafe fn test_bmi_32() {
30+
unsafe fn test_bextr_u32() {
31+
let r = _bextr_u32(0b0101_0000u32, 4, 4);
32+
assert_eq!(r, 0b0000_0101u32);
33+
34+
for i in 0..16 {
35+
assert_eq!(_bextr_u32(u32::MAX, i, 4), 0b1111);
36+
assert_eq!(_bextr_u32(u32::MAX, 4, i), (1 << i) - 1);
37+
}
38+
39+
// Ensure that indices larger than the bit count are covered.
40+
// It is important to go above 32 in order to verify the bit selection
41+
// of the instruction.
42+
43+
for i in 0..256 {
44+
// If the index is out of bounds, the original input won't be changed, thus the `min(32)`.
45+
assert_eq!(_bextr_u32(u32::MAX, 0, i).count_ones(), i.min(32));
46+
}
47+
48+
for i in 0..256 {
49+
assert_eq!(_bextr_u32(u32::MAX, i, 0), 0);
50+
}
51+
52+
// Test cases with completly random values. These cases also test
53+
// that the function works even if upper bits of the control value are set.
54+
assert_eq!(_bextr2_u32(0x7408a392, 0x54ef705), 0x3a0451c);
55+
assert_eq!(_bextr2_u32(0xbc5a3494, 0xdd193203), 0x178b4692);
56+
assert_eq!(_bextr2_u32(0xc0332325, 0xf96e207), 0x1806646);
57+
}
58+
test_bextr_u32();
59+
60+
unsafe fn test_pext_u32() {
61+
let n = 0b1011_1110_1001_0011u32;
62+
63+
let m0 = 0b0110_0011_1000_0101u32;
64+
let s0 = 0b0000_0000_0011_0101u32;
65+
66+
let m1 = 0b1110_1011_1110_1111u32;
67+
let s1 = 0b0001_0111_0100_0011u32;
68+
69+
// Testing of random values.
70+
assert_eq!(_pext_u32(n, m0), s0);
71+
assert_eq!(_pext_u32(n, m1), s1);
72+
assert_eq!(_pext_u32(0x12345678, 0xff00fff0), 0x00012567);
73+
74+
// Testing of various identities.
75+
assert_eq!(_pext_u32(u32::MAX, u32::MAX), u32::MAX);
76+
assert_eq!(_pext_u32(u32::MAX, 0), 0);
77+
assert_eq!(_pext_u32(0, u32::MAX), 0);
78+
}
79+
test_pext_u32();
80+
81+
unsafe fn test_pdep_u32() {
82+
let n = 0b1011_1110_1001_0011u32;
83+
84+
let m0 = 0b0110_0011_1000_0101u32;
85+
let s0 = 0b0000_0010_0000_0101u32;
86+
87+
let m1 = 0b1110_1011_1110_1111u32;
88+
let s1 = 0b1110_1001_0010_0011u32;
89+
90+
// Testing of random values.
91+
assert_eq!(_pdep_u32(n, m0), s0);
92+
assert_eq!(_pdep_u32(n, m1), s1);
93+
assert_eq!(_pdep_u32(0x00012567, 0xff00fff0), 0x12005670);
94+
95+
// Testing of various identities.
96+
assert_eq!(_pdep_u32(u32::MAX, u32::MAX), u32::MAX);
97+
assert_eq!(_pdep_u32(0, u32::MAX), 0);
98+
assert_eq!(_pdep_u32(u32::MAX, 0), 0);
99+
}
100+
test_pdep_u32();
101+
102+
unsafe fn test_bzhi_u32() {
103+
let n = 0b1111_0010u32;
104+
let s = 0b0001_0010u32;
105+
assert_eq!(_bzhi_u32(n, 5), s);
106+
107+
// Ensure that indices larger than the bit count are covered.
108+
// It is important to go above 32 in order to verify the bit selection
109+
// of the instruction.
110+
for i in 0..=512 {
111+
// The instruction only takes the lowest eight bits to generate the index, hence `i & 0xff`.
112+
// If the index is out of bounds, the original input won't be changed, thus the `min(32)`.
113+
let expected = 1u32.checked_shl((i & 0xff).min(32)).unwrap_or(0).wrapping_sub(1);
114+
let actual = _bzhi_u32(u32::MAX, i);
115+
assert_eq!(expected, actual);
116+
}
117+
}
118+
test_bzhi_u32();
119+
}
120+
121+
#[cfg(not(target_arch = "x86_64"))]
122+
unsafe fn test_bmi_64() {}
123+
124+
/// Test the 64-bit variants of the intrinsics.
125+
#[cfg(target_arch = "x86_64")]
126+
unsafe fn test_bmi_64() {
127+
unsafe fn test_bextr_u64() {
128+
let r = _bextr_u64(0b0101_0000u64, 4, 4);
129+
assert_eq!(r, 0b0000_0101u64);
130+
131+
for i in 0..16 {
132+
assert_eq!(_bextr_u64(u64::MAX, i, 4), 0b1111);
133+
assert_eq!(_bextr_u64(u64::MAX, 32, i), (1 << i) - 1);
134+
}
135+
136+
// Ensure that indices larger than the bit count are covered.
137+
// It is important to go above 64 in order to verify the bit selection
138+
// of the instruction.
139+
140+
for i in 0..256 {
141+
// If the index is out of bounds, the original input won't be changed, thus the `min(64)`.
142+
assert_eq!(_bextr_u64(u64::MAX, 0, i).count_ones(), i.min(64));
143+
}
144+
145+
for i in 0..256 {
146+
assert_eq!(_bextr_u64(u64::MAX, i, 0), 0);
147+
}
148+
149+
// Test cases with completly random values. These cases also test
150+
// that the function works even if upper bits of the control value are set.
151+
assert_eq!(_bextr2_u64(0x4ff6cfbcea75f055, 0x216642e228425719), 0x27fb67de75);
152+
assert_eq!(_bextr2_u64(0xb05e991e6f6e1b6, 0xc76dd5d7f67dfc14), 0xb05e991e6f);
153+
assert_eq!(_bextr2_u64(0x5a3a629e323d848f, 0x95ac507d20e7719), 0x2d1d314f19);
154+
}
155+
test_bextr_u64();
156+
157+
unsafe fn test_pext_u64() {
158+
let n = 0b1011_1110_1001_0011u64;
159+
160+
let m0 = 0b0110_0011_1000_0101u64;
161+
let s0 = 0b0000_0000_0011_0101u64;
162+
163+
let m1 = 0b1110_1011_1110_1111u64;
164+
let s1 = 0b0001_0111_0100_0011u64;
165+
166+
// Testing of random values.
167+
assert_eq!(_pext_u64(n, m0), s0);
168+
assert_eq!(_pext_u64(n, m1), s1);
169+
assert_eq!(_pext_u64(0x12345678, 0xff00fff0), 0x00012567);
170+
171+
// Testing of various identities.
172+
assert_eq!(_pext_u64(u64::MAX, u64::MAX), u64::MAX);
173+
assert_eq!(_pext_u64(u64::MAX, 0), 0);
174+
assert_eq!(_pext_u64(0, u64::MAX), 0);
175+
}
176+
test_pext_u64();
177+
178+
unsafe fn test_pdep_u64() {
179+
let n = 0b1011_1110_1001_0011u64;
180+
181+
let m0 = 0b0110_0011_1000_0101u64;
182+
let s0 = 0b0000_0010_0000_0101u64;
183+
184+
let m1 = 0b1110_1011_1110_1111u64;
185+
let s1 = 0b1110_1001_0010_0011u64;
186+
187+
// Testing of random values.
188+
assert_eq!(_pdep_u64(n, m0), s0);
189+
assert_eq!(_pdep_u64(n, m1), s1);
190+
assert_eq!(_pdep_u64(0x00012567, 0xff00fff0), 0x12005670);
191+
192+
// Testing of various identities.
193+
assert_eq!(_pdep_u64(u64::MAX, u64::MAX), u64::MAX);
194+
assert_eq!(_pdep_u64(0, u64::MAX), 0);
195+
assert_eq!(_pdep_u64(u64::MAX, 0), 0);
196+
}
197+
test_pdep_u64();
198+
199+
unsafe fn test_bzhi_u64() {
200+
let n = 0b1111_0010u64;
201+
let s = 0b0001_0010u64;
202+
assert_eq!(_bzhi_u64(n, 5), s);
203+
204+
// Ensure that indices larger than the bit count are covered.
205+
// It is important to go above 255 in order to verify the bit selection
206+
// of the instruction.
207+
for i in 0..=512 {
208+
// The instruction only takes the lowest eight bits to generate the index, hence `i & 0xff`.
209+
// If the index is out of bounds, the original input won't be changed, thus the `min(64)`.
210+
let expected = 1u64.checked_shl((i & 0xff).min(64)).unwrap_or(0).wrapping_sub(1);
211+
let actual = _bzhi_u64(u64::MAX, i);
212+
assert_eq!(expected, actual);
213+
}
214+
}
215+
test_bzhi_u64();
216+
}

0 commit comments

Comments
 (0)