@@ -435,6 +435,7 @@ macro_rules! float {
435
435
let signif_bits = width - 1 - $exp_bits;
436
436
let signif_mask = ( 1 << exp_offset) - 1 ;
437
437
let bias = ( 1 << ( $exp_bits - 1 ) ) - 1 ;
438
+ let msb = 1 << neg_offset;
438
439
439
440
let ( hex, integral, fractional, exponent_str) = match val {
440
441
// Infinity is when the exponent bits are all set and
@@ -497,53 +498,77 @@ macro_rules! float {
497
498
return Some ( float. to_bits( ) ) ;
498
499
}
499
500
500
- // Parsing hex floats is... hard! I don't really know what most of
501
- // this below does. It was copied from Gecko's implementation in
502
- // `WasmTextToBinary.cpp`. Would love comments on this if you have
503
- // them!
504
- let fractional = fractional. as_ref( ) . map( |s| & * * s) . unwrap_or( "" ) ;
505
- let negative = integral. starts_with( '-' ) ;
501
+ // Parse a hexadecimal floating-point value.
502
+ //
503
+ // The main loop here is simpler than for parsing decimal floats,
504
+ // because we can just parse hexadecimal digits and then shift
505
+ // their bits into place in the significand. But in addition to
506
+ // that, we also need to handle non-normalized representations,
507
+ // where the integral part is not "1", to convert them to
508
+ // normalized results, to round, in case we get more digits than
509
+ // the target format supports, and to handle overflow and subnormal
510
+ // cases.
511
+
512
+ // Get slices of digits for the integral and fractional parts. We
513
+ // can trivially skip any leading zeros in the integral part.
514
+ let is_negative = integral. starts_with( '-' ) ;
506
515
let integral = integral. trim_start_matches( '-' ) . trim_start_matches( '0' ) ;
516
+ let fractional = fractional. as_ref( ) . map( |s| & * * s) . unwrap_or( "" ) ;
507
517
508
- // Do a bunch of work up front to locate the first non-zero digit
509
- // to determine the initial exponent. There's a number of
510
- // adjustments depending on where the digit was found, but the
511
- // general idea here is that I'm not really sure why things are
512
- // calculated the way they are but it should match Gecko.
518
+ // Locate the first non-zero digit to determine the initial exponent.
519
+ //
520
+ // If there's no integral part, skip past leading zeros so that
521
+ // something like "0x.0000000000000000000002" doesn't cause us to hit
522
+ // a shift overflow when we try to shift the value into place. We'll
523
+ // adjust the exponent below to account for these skipped zeros.
513
524
let fractional_no_leading = fractional. trim_start_matches( '0' ) ;
514
525
let fractional_iter = if integral. is_empty( ) {
515
526
fractional_no_leading. chars( )
516
527
} else {
517
528
fractional. chars( )
518
529
} ;
530
+
531
+ // Create a unified iterator over the digits of the integral part
532
+ // followed by the digits of the fractional part. The boolean value
533
+ // indicates which of these parts we're in.
519
534
let mut digits = integral. chars( )
520
535
. map( |c| ( to_hex( c) as $int, false ) )
521
536
. chain( fractional_iter. map( |c| ( to_hex( c) as $int, true ) ) ) ;
537
+
538
+ // Compute the number of leading zeros in the first non-zero digit,
539
+ // since if the first digit is not "1" we'll need to adjust for
540
+ // normalization.
522
541
let lead_nonzero_digit = match digits. next( ) {
523
542
Some ( ( c, _) ) => c,
524
- // No digits? Must be `+0` or `-0`, being careful to handle the
525
- // sign encoding here.
526
- None if negative => return Some ( 1 << ( width - 1 ) ) ,
543
+ // No non-zero digits? Must be `+0` or `-0`, being careful to
544
+ // handle the sign encoding here.
545
+ None if is_negative => return Some ( msb ) ,
527
546
None => return Some ( 0 ) ,
528
547
} ;
529
- let mut significand = 0 as $int;
548
+ let lz = ( lead_nonzero_digit as u8 ) . leading_zeros( ) as i32 - 4 ;
549
+
550
+ // Prepare for the main parsing loop. Calculate the initial values
551
+ // of `exponent` and `significand` based on what we've seen so far.
530
552
let mut exponent = if !integral. is_empty( ) {
531
553
1
532
554
} else {
555
+ // Adjust the exponent digits to account for any leading zeros
556
+ // in the fractional part that we skipped above.
533
557
-( ( fractional. len( ) - fractional_no_leading. len( ) + 1 ) as i32 ) + 1
534
558
} ;
535
- let lz = ( lead_nonzero_digit as u8 ) . leading_zeros( ) as i32 - 4 ;
536
- exponent = exponent. checked_mul( 4 ) ?. checked_sub( lz + 1 ) ?;
537
559
let mut significand_pos = ( width - ( 4 - ( lz as usize ) ) ) as isize ;
538
- assert!( significand_pos >= 0 ) ;
539
- significand |= lead_nonzero_digit << significand_pos;
560
+ let mut significand: $int = lead_nonzero_digit << significand_pos;
561
+ let mut discarded_extra_nonzero = false ;
562
+
563
+ assert!( significand_pos >= 0 , "$int should be at least 4 bits wide" ) ;
564
+
565
+ // Adjust for leading zeros in the first digit.
566
+ exponent = exponent. checked_mul( 4 ) ?. checked_sub( lz + 1 ) ?;
540
567
541
568
// Now that we've got an anchor in the string we parse the remaining
542
- // digits. Again, not entirely sure why everything is the way it is
543
- // here! This is copied frmo gecko.
544
- let mut discarded_extra_nonzero = false ;
545
- for ( digit, is_fractional) in digits {
546
- if !is_fractional {
569
+ // hexadecimal digits.
570
+ for ( digit, in_fractional) in digits {
571
+ if !in_fractional {
547
572
exponent += 4 ;
548
573
}
549
574
if significand_pos > -4 {
@@ -560,12 +585,19 @@ macro_rules! float {
560
585
}
561
586
}
562
587
588
+ debug_assert!( significand != 0 , "The case of no non-zero digits should have been handled above" ) ;
589
+
590
+ // Parse the exponent string, which despite this being a hexadecimal
591
+ // syntax, is a decimal number, and add it the exponent we've
592
+ // computed from the potentially non-normalized significand.
563
593
exponent = exponent. checked_add( match exponent_str {
564
594
Some ( s) => s. parse:: <i32 >( ) . ok( ) ?,
565
595
None => 0 ,
566
596
} ) ?;
567
- debug_assert!( significand != 0 ) ;
568
597
598
+ // Encode the exponent and significand. Also calculate the bits of
599
+ // the significand which are discarded, as we'll use them to
600
+ // determine if we need to round up.
569
601
let ( encoded_exponent, encoded_significand, discarded_significand) =
570
602
if exponent <= -bias {
571
603
// Underflow to subnormal or zero.
@@ -598,26 +630,29 @@ macro_rules! float {
598
630
)
599
631
} ;
600
632
633
+ // Combine the encoded exponent and encoded significand to produce
634
+ // the raw result, except for the sign bit, which we'll apply at
635
+ // the end.
601
636
let bits = encoded_exponent | encoded_significand;
602
637
603
- // Apply rounding. If this overflows the significand, it carries
604
- // into the exponent bit according to the magic of the IEEE 754
605
- // encoding.
606
- //
607
- // Or rather, the comment above is what Gecko says so it's copied
608
- // here too.
609
- let msb = 1 << ( width - 1 ) ;
638
+ // Apply rounding. Do an integer add of `0` or `1` on the raw
639
+ // result, depending on whether rounding is needed. Rounding can
640
+ // lead to a floating-point overflow, but we don't need to
641
+ // special-case that here because it turns out that IEEE 754 floats
642
+ // are encoded such that when an integer add of `1` carries into
643
+ // the bits of the exponent field, it produces the correct encoding
644
+ // for infinity.
610
645
let bits = bits
611
646
+ ( ( ( discarded_significand & msb != 0 )
612
647
&& ( ( discarded_significand & !msb != 0 ) ||
613
648
discarded_extra_nonzero ||
614
649
// ties to even
615
650
( encoded_significand & 1 != 0 ) ) ) as $int) ;
616
651
617
- // Just before we return the bits be sure to handle the sign bit we
652
+ // Just before we return the bits, be sure to handle the sign bit we
618
653
// found at the beginning.
619
- let bits = if negative {
620
- bits | ( 1 << ( width - 1 ) )
654
+ let bits = if is_negative {
655
+ bits | msb
621
656
} else {
622
657
bits
623
658
} ;
0 commit comments