@@ -1777,6 +1777,12 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
1777
1777
dest_ty : Type < ' gcc > ,
1778
1778
) -> RValue < ' gcc > {
1779
1779
let src_ty = self . cx . val_ty ( val) ;
1780
+ // This function uses val quite heavily, which can lead to duplication of rvalues.
1781
+ // Assign val to a local to prevent this.
1782
+ /*
1783
+ let val_local = func.new_local(self.location, src_ty, "val_local");
1784
+ self.block.add_assignment(self.location, val_local, val);*/
1785
+
1780
1786
let ( float_ty, int_ty) = if self . cx . type_kind ( src_ty) == TypeKind :: Vector {
1781
1787
assert_eq ! ( self . cx. vector_length( src_ty) , self . cx. vector_length( dest_ty) ) ;
1782
1788
( self . cx . element_type ( src_ty) , self . cx . element_type ( dest_ty) )
@@ -1899,40 +1905,81 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
1899
1905
let zero = maybe_splat ( self , zero) ;
1900
1906
1901
1907
// Step 1 ...
1902
- let fptosui_result =
1903
- if signed { self . fptosi ( val, dest_ty) } else { self . fptoui ( val, dest_ty) } ;
1908
+ let func = self . current_func . borrow ( ) . expect ( "func" ) ;
1904
1909
let less_or_nan = self . fcmp ( RealPredicate :: RealULT , val, f_min) ;
1905
1910
let greater = self . fcmp ( RealPredicate :: RealOGT , val, f_max) ;
1906
1911
1907
- // Step 2: We use two comparisons and two selects, with %s1 being the
1908
- // result:
1909
- // %less_or_nan = fcmp ult %val, %f_min
1910
- // %greater = fcmp olt %val, %f_max
1911
- // %s0 = select %less_or_nan, int_ty::MIN, %fptosi_result
1912
- // %s1 = select %greater, int_ty::MAX, %s0
1913
- // Note that %less_or_nan uses an *unordered* comparison. This
1914
- // comparison is true if the operands are not comparable (i.e., if val is
1915
- // NaN). The unordered comparison ensures that s1 becomes int_ty::MIN if
1916
- // val is NaN.
1917
- //
1918
- // Performance note: Unordered comparison can be lowered to a "flipped"
1919
- // comparison and a negation, and the negation can be merged into the
1920
- // select. Therefore, it not necessarily any more expensive than an
1921
- // ordered ("normal") comparison. Whether these optimizations will be
1922
- // performed is ultimately up to the backend, but at least x86 does
1923
- // perform them.
1924
- let s0 = self . select ( less_or_nan, int_min, fptosui_result) ;
1925
- let s1 = self . select ( greater, int_max, s0) ;
1926
-
1927
- // Step 3: NaN replacement.
1928
- // For unsigned types, the above step already yielded int_ty::MIN == 0 if val is NaN.
1929
- // Therefore we only need to execute this step for signed integer types.
1912
+ let cast_res = func. new_local ( self . location , dest_ty, "fti_cast_res" ) ;
1930
1913
if signed {
1931
- // LLVM has no isNaN predicate, so we use (val == val) instead
1932
- let cmp = self . fcmp ( RealPredicate :: RealOEQ , val, val) ;
1933
- self . select ( cmp, s1, zero)
1914
+ // Create blocks
1915
+ let nan = func. new_block ( "nan" ) ;
1916
+ let not_nan = func. new_block ( "not_nan" ) ;
1917
+ let gt_min = func. new_block ( "gt_min" ) ;
1918
+ let in_bounds = func. new_block ( "in_bounds" ) ;
1919
+ let gt_max = func. new_block ( "gt_max" ) ;
1920
+ let lt_min = func. new_block ( "lt_min" ) ;
1921
+ let after_block = func. new_block ( "after_cast" ) ;
1922
+ // First, we check if the value is NAN. If it is, we jump away to the NaN block.
1923
+ // If it is not, we continue on to the notNAN block
1924
+ let is_nan = self . fcmp ( RealPredicate :: RealOEQ , val, val) ;
1925
+ self . block . end_with_conditional ( self . location , is_nan, nan, not_nan) ;
1926
+ // If the value is NaN, assign 0 to cast_res, and jump to `after`.
1927
+ self . switch_to_block ( nan) ;
1928
+ self . block . add_assignment ( self . location , cast_res, zero) ;
1929
+ self . block . end_with_jump ( self . location , after_block) ;
1930
+ // The value is not NaN. Check if it is lower than the min end of our range.
1931
+ self . switch_to_block ( not_nan) ;
1932
+ self . block . end_with_conditional ( self . location , less_or_nan, lt_min, gt_min) ;
1933
+ // Value less than min - assign min to cast_res, jump to `after`.
1934
+ self . switch_to_block ( lt_min) ;
1935
+ self . block . add_assignment ( self . location , cast_res, int_min) ;
1936
+ self . block . end_with_jump ( self . location , after_block) ;
1937
+ // Value greater than min - check if it fits within the upper end of our range.
1938
+ self . switch_to_block ( gt_min) ;
1939
+ self . block . end_with_conditional ( self . location , greater, gt_max, in_bounds) ;
1940
+ // Value is greater than MAX - assign MAX to cast_res, jump to after.
1941
+ self . switch_to_block ( gt_max) ;
1942
+ self . block . add_assignment ( self . location , cast_res, int_max) ;
1943
+ self . block . end_with_jump ( self . location , after_block) ;
1944
+ // Value in range - we can safely cast.
1945
+ self . switch_to_block ( in_bounds) ;
1946
+ let fptosi_result = self . fptosi ( val, dest_ty) ;
1947
+ self . block . add_assignment ( self . location , cast_res, fptosi_result) ;
1948
+ self . block . end_with_jump ( self . location , after_block) ;
1949
+ // The final block - read `cast_res`, continue on our merry way :).
1950
+ self . switch_to_block ( after_block) ;
1951
+ return cast_res. to_rvalue ( ) ;
1934
1952
} else {
1935
- s1
1953
+ // Create blocks
1954
+ let lt_max = func. new_block ( "lt_max" ) ;
1955
+ let in_bounds = func. new_block ( "in_bounds" ) ;
1956
+ let gt_max = func. new_block ( "gt_max" ) ;
1957
+ let lt_min = func. new_block ( "lt_min" ) ;
1958
+ let after_block = func. new_block ( "after_cast" ) ;
1959
+ // We first start by checking if the value is greater than max. This order(opposite of singed)
1960
+ // will allow us to save a NaN check later down the line.
1961
+ let greater = self . fcmp ( RealPredicate :: RealOGT , val, f_max) ;
1962
+ self . block . end_with_conditional ( self . location , greater, gt_max, lt_max) ;
1963
+ // Value greater than max - just assign max, jump to after.
1964
+ self . switch_to_block ( gt_max) ;
1965
+ self . block . add_assignment ( self . location , cast_res, int_max) ;
1966
+ self . block . end_with_jump ( self . location , after_block) ;
1967
+ // Value not greater than max - either in range, less than, or NaN.
1968
+ self . switch_to_block ( lt_max) ;
1969
+ let greater_than_zero = self . fcmp ( RealPredicate :: RealOGT , val, f_min) ;
1970
+ self . block . end_with_conditional ( self . location , greater_than_zero, in_bounds, lt_min) ;
1971
+ // Value less than min - assign min to cast_res, jump to `after`.
1972
+ self . switch_to_block ( lt_min) ;
1973
+ self . block . add_assignment ( self . location , cast_res, int_min) ;
1974
+ self . block . end_with_jump ( self . location , after_block) ;
1975
+ // Value in range - we can safely cast.
1976
+ self . switch_to_block ( in_bounds) ;
1977
+ let fptosi_result = self . fptosi ( val, dest_ty) ;
1978
+ self . block . add_assignment ( self . location , cast_res, fptosi_result) ;
1979
+ self . block . end_with_jump ( self . location , after_block) ;
1980
+ // The final block - read `cast_res`, continue on our merry way :).
1981
+ self . switch_to_block ( after_block) ;
1982
+ return cast_res. to_rvalue ( ) ;
1936
1983
}
1937
1984
}
1938
1985
0 commit comments