Skip to content

Commit 0ea63af

Browse files
committed
Initial refactor of float to int casts
1 parent 706905b commit 0ea63af

File tree

1 file changed

+77
-29
lines changed

1 file changed

+77
-29
lines changed

src/builder.rs

Lines changed: 77 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1777,6 +1777,13 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
17771777
dest_ty: Type<'gcc>,
17781778
) -> RValue<'gcc> {
17791779
let src_ty = self.cx.val_ty(val);
1780+
// This function uses val quite heavily, which can lead to duplication of rvalues.
1781+
// Assign val to a local to prevent this.
1782+
/*
1783+
let val_local = func.new_local(self.location, src_ty, "val_local");
1784+
self.block.add_assignment(self.location, val_local, val);*/
1785+
let func = self.current_func.borrow().expect("func");
1786+
let val = val.to_rvalue();
17801787
let (float_ty, int_ty) = if self.cx.type_kind(src_ty) == TypeKind::Vector {
17811788
assert_eq!(self.cx.vector_length(src_ty), self.cx.vector_length(dest_ty));
17821789
(self.cx.element_type(src_ty), self.cx.element_type(dest_ty))
@@ -1899,40 +1906,81 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
18991906
let zero = maybe_splat(self, zero);
19001907

19011908
// Step 1 ...
1902-
let fptosui_result =
1903-
if signed { self.fptosi(val, dest_ty) } else { self.fptoui(val, dest_ty) };
1909+
19041910
let less_or_nan = self.fcmp(RealPredicate::RealULT, val, f_min);
19051911
let greater = self.fcmp(RealPredicate::RealOGT, val, f_max);
19061912

1907-
// Step 2: We use two comparisons and two selects, with %s1 being the
1908-
// result:
1909-
// %less_or_nan = fcmp ult %val, %f_min
1910-
// %greater = fcmp olt %val, %f_max
1911-
// %s0 = select %less_or_nan, int_ty::MIN, %fptosi_result
1912-
// %s1 = select %greater, int_ty::MAX, %s0
1913-
// Note that %less_or_nan uses an *unordered* comparison. This
1914-
// comparison is true if the operands are not comparable (i.e., if val is
1915-
// NaN). The unordered comparison ensures that s1 becomes int_ty::MIN if
1916-
// val is NaN.
1917-
//
1918-
// Performance note: Unordered comparison can be lowered to a "flipped"
1919-
// comparison and a negation, and the negation can be merged into the
1920-
// select. Therefore, it not necessarily any more expensive than an
1921-
// ordered ("normal") comparison. Whether these optimizations will be
1922-
// performed is ultimately up to the backend, but at least x86 does
1923-
// perform them.
1924-
let s0 = self.select(less_or_nan, int_min, fptosui_result);
1925-
let s1 = self.select(greater, int_max, s0);
1926-
1927-
// Step 3: NaN replacement.
1928-
// For unsigned types, the above step already yielded int_ty::MIN == 0 if val is NaN.
1929-
// Therefore we only need to execute this step for signed integer types.
1913+
let cast_res = func.new_local(self.location, dest_ty, "fti_cast_res");
19301914
if signed {
1931-
// LLVM has no isNaN predicate, so we use (val == val) instead
1932-
let cmp = self.fcmp(RealPredicate::RealOEQ, val, val);
1933-
self.select(cmp, s1, zero)
1915+
// Create blocks
1916+
let nan = func.new_block("nan");
1917+
let not_nan = func.new_block("not_nan");
1918+
let gt_min = func.new_block("gt_min");
1919+
let in_bounds = func.new_block("in_bounds");
1920+
let gt_max = func.new_block("gt_max");
1921+
let lt_min = func.new_block("lt_min");
1922+
let after_block = func.new_block("after_cast");
1923+
// First, we check if the value is NAN. If it is, we jump away to the NaN block.
1924+
// If it is not, we continue on to the notNAN block
1925+
let is_nan = self.fcmp(RealPredicate::RealOEQ, val, val);
1926+
self.block.end_with_conditional(self.location, is_nan, nan, not_nan);
1927+
// If the value is NaN, assign 0 to cast_res, and jump to `after`.
1928+
self.switch_to_block(nan);
1929+
self.block.add_assignment(self.location, cast_res, zero);
1930+
self.block.end_with_jump(self.location, after_block);
1931+
// The value is not NaN. Check if it is lower than the min end of our range.
1932+
self.switch_to_block(not_nan);
1933+
self.block.end_with_conditional(self.location, less_or_nan, lt_min, gt_min);
1934+
// Value less than min - assign min to cast_res, jump to `after`.
1935+
self.switch_to_block(lt_min);
1936+
self.block.add_assignment(self.location, cast_res, int_min);
1937+
self.block.end_with_jump(self.location, after_block);
1938+
// Value greater than min - check if it fits within the upper end of our range.
1939+
self.switch_to_block(gt_min);
1940+
self.block.end_with_conditional(self.location, greater, gt_max, in_bounds);
1941+
// Value is greater than MAX - assign MAX to cast_res, jump to after.
1942+
self.switch_to_block(gt_max);
1943+
self.block.add_assignment(self.location, cast_res, int_max);
1944+
self.block.end_with_jump(self.location, after_block);
1945+
// Value in range - we can safely cast.
1946+
self.switch_to_block(in_bounds);
1947+
let fptosi_result = self.fptosi(val, dest_ty);
1948+
self.block.add_assignment(self.location, cast_res, fptosi_result);
1949+
self.block.end_with_jump(self.location, after_block);
1950+
// The final block - read `cast_res`, continue on our merry way :).
1951+
self.switch_to_block(after_block);
1952+
return cast_res.to_rvalue();
19341953
} else {
1935-
s1
1954+
// Create blocks
1955+
let lt_max = func.new_block("lt_max");
1956+
let in_bounds = func.new_block("in_bounds");
1957+
let gt_max = func.new_block("gt_max");
1958+
let lt_min = func.new_block("lt_min");
1959+
let after_block = func.new_block("after_cast");
1960+
// We first start by checking if the value is greater than max. This order(opposite of singed)
1961+
// will allow us to save a NaN check later down the line.
1962+
let greater = self.fcmp(RealPredicate::RealOGT, val, f_max);
1963+
self.block.end_with_conditional(self.location, greater, gt_max, lt_max);
1964+
// Value greater than max - just assign max, jump to after.
1965+
self.switch_to_block(gt_max);
1966+
self.block.add_assignment(self.location, cast_res, int_max);
1967+
self.block.end_with_jump(self.location, after_block);
1968+
// Value not greater than max - either in range, less than, or NaN.
1969+
self.switch_to_block(lt_max);
1970+
let greater_than_zero = self.fcmp(RealPredicate::RealOGT, val, f_min);
1971+
self.block.end_with_conditional(self.location, greater_than_zero, in_bounds, lt_min);
1972+
// Value less than min - assign min to cast_res, jump to `after`.
1973+
self.switch_to_block(lt_min);
1974+
self.block.add_assignment(self.location, cast_res, int_min);
1975+
self.block.end_with_jump(self.location, after_block);
1976+
// Value in range - we can safely cast.
1977+
self.switch_to_block(in_bounds);
1978+
let fptosi_result = self.fptosi(val, dest_ty);
1979+
self.block.add_assignment(self.location, cast_res, fptosi_result);
1980+
self.block.end_with_jump(self.location, after_block);
1981+
// The final block - read `cast_res`, continue on our merry way :).
1982+
self.switch_to_block(after_block);
1983+
return cast_res.to_rvalue();
19361984
}
19371985
}
19381986

0 commit comments

Comments
 (0)