Skip to content

Commit 6a76d04

Browse files
authored
Improve the comments for hexadecimal float parsing. (bytecodealliance#1888)
* Improve the comments for hexadecimal float parsing. Replace the comments requesting better comments with better comments, and tidy up the code. * Fix a typo.
1 parent 5a7419d commit 6a76d04

File tree

1 file changed

+70
-35
lines changed

1 file changed

+70
-35
lines changed

crates/wast/src/token.rs

Lines changed: 70 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,7 @@ macro_rules! float {
435435
let signif_bits = width - 1 - $exp_bits;
436436
let signif_mask = (1 << exp_offset) - 1;
437437
let bias = (1 << ($exp_bits - 1)) - 1;
438+
let msb = 1 << neg_offset;
438439

439440
let (hex, integral, fractional, exponent_str) = match val {
440441
// Infinity is when the exponent bits are all set and
@@ -497,53 +498,77 @@ macro_rules! float {
497498
return Some(float.to_bits());
498499
}
499500

500-
// Parsing hex floats is... hard! I don't really know what most of
501-
// this below does. It was copied from Gecko's implementation in
502-
// `WasmTextToBinary.cpp`. Would love comments on this if you have
503-
// them!
504-
let fractional = fractional.as_ref().map(|s| &**s).unwrap_or("");
505-
let negative = integral.starts_with('-');
501+
// Parse a hexadecimal floating-point value.
502+
//
503+
// The main loop here is simpler than for parsing decimal floats,
504+
// because we can just parse hexadecimal digits and then shift
505+
// their bits into place in the significand. But in addition to
506+
// that, we also need to handle non-normalized representations,
507+
// where the integral part is not "1", to convert them to
508+
// normalized results, to round, in case we get more digits than
509+
// the target format supports, and to handle overflow and subnormal
510+
// cases.
511+
512+
// Get slices of digits for the integral and fractional parts. We
513+
// can trivially skip any leading zeros in the integral part.
514+
let is_negative = integral.starts_with('-');
506515
let integral = integral.trim_start_matches('-').trim_start_matches('0');
516+
let fractional = fractional.as_ref().map(|s| &**s).unwrap_or("");
507517

508-
// Do a bunch of work up front to locate the first non-zero digit
509-
// to determine the initial exponent. There's a number of
510-
// adjustments depending on where the digit was found, but the
511-
// general idea here is that I'm not really sure why things are
512-
// calculated the way they are but it should match Gecko.
518+
// Locate the first non-zero digit to determine the initial exponent.
519+
//
520+
// If there's no integral part, skip past leading zeros so that
521+
// something like "0x.0000000000000000000002" doesn't cause us to hit
522+
// a shift overflow when we try to shift the value into place. We'll
523+
// adjust the exponent below to account for these skipped zeros.
513524
let fractional_no_leading = fractional.trim_start_matches('0');
514525
let fractional_iter = if integral.is_empty() {
515526
fractional_no_leading.chars()
516527
} else {
517528
fractional.chars()
518529
};
530+
531+
// Create a unified iterator over the digits of the integral part
532+
// followed by the digits of the fractional part. The boolean value
533+
// indicates which of these parts we're in.
519534
let mut digits = integral.chars()
520535
.map(|c| (to_hex(c) as $int, false))
521536
.chain(fractional_iter.map(|c| (to_hex(c) as $int, true)));
537+
538+
// Compute the number of leading zeros in the first non-zero digit,
539+
// since if the first digit is not "1" we'll need to adjust for
540+
// normalization.
522541
let lead_nonzero_digit = match digits.next() {
523542
Some((c, _)) => c,
524-
// No digits? Must be `+0` or `-0`, being careful to handle the
525-
// sign encoding here.
526-
None if negative => return Some(1 << (width - 1)),
543+
// No non-zero digits? Must be `+0` or `-0`, being careful to
544+
// handle the sign encoding here.
545+
None if is_negative => return Some(msb),
527546
None => return Some(0),
528547
};
529-
let mut significand = 0 as $int;
548+
let lz = (lead_nonzero_digit as u8).leading_zeros() as i32 - 4;
549+
550+
// Prepare for the main parsing loop. Calculate the initial values
551+
// of `exponent` and `significand` based on what we've seen so far.
530552
let mut exponent = if !integral.is_empty() {
531553
1
532554
} else {
555+
// Adjust the exponent digits to account for any leading zeros
556+
// in the fractional part that we skipped above.
533557
-((fractional.len() - fractional_no_leading.len() + 1) as i32) + 1
534558
};
535-
let lz = (lead_nonzero_digit as u8).leading_zeros() as i32 - 4;
536-
exponent = exponent.checked_mul(4)?.checked_sub(lz + 1)?;
537559
let mut significand_pos = (width - (4 - (lz as usize))) as isize;
538-
assert!(significand_pos >= 0);
539-
significand |= lead_nonzero_digit << significand_pos;
560+
let mut significand: $int = lead_nonzero_digit << significand_pos;
561+
let mut discarded_extra_nonzero = false;
562+
563+
assert!(significand_pos >= 0, "$int should be at least 4 bits wide");
564+
565+
// Adjust for leading zeros in the first digit.
566+
exponent = exponent.checked_mul(4)?.checked_sub(lz + 1)?;
540567

541568
// Now that we've got an anchor in the string we parse the remaining
542-
// digits. Again, not entirely sure why everything is the way it is
543-
// here! This is copied frmo gecko.
544-
let mut discarded_extra_nonzero = false;
545-
for (digit, is_fractional) in digits {
546-
if !is_fractional {
569+
// hexadecimal digits.
570+
for (digit, in_fractional) in digits {
571+
if !in_fractional {
547572
exponent += 4;
548573
}
549574
if significand_pos > -4 {
@@ -560,12 +585,19 @@ macro_rules! float {
560585
}
561586
}
562587

588+
debug_assert!(significand != 0, "The case of no non-zero digits should have been handled above");
589+
590+
// Parse the exponent string, which despite this being a hexadecimal
591+
// syntax, is a decimal number, and add it the exponent we've
592+
// computed from the potentially non-normalized significand.
563593
exponent = exponent.checked_add(match exponent_str {
564594
Some(s) => s.parse::<i32>().ok()?,
565595
None => 0,
566596
})?;
567-
debug_assert!(significand != 0);
568597

598+
// Encode the exponent and significand. Also calculate the bits of
599+
// the significand which are discarded, as we'll use them to
600+
// determine if we need to round up.
569601
let (encoded_exponent, encoded_significand, discarded_significand) =
570602
if exponent <= -bias {
571603
// Underflow to subnormal or zero.
@@ -598,26 +630,29 @@ macro_rules! float {
598630
)
599631
};
600632

633+
// Combine the encoded exponent and encoded significand to produce
634+
// the raw result, except for the sign bit, which we'll apply at
635+
// the end.
601636
let bits = encoded_exponent | encoded_significand;
602637

603-
// Apply rounding. If this overflows the significand, it carries
604-
// into the exponent bit according to the magic of the IEEE 754
605-
// encoding.
606-
//
607-
// Or rather, the comment above is what Gecko says so it's copied
608-
// here too.
609-
let msb = 1 << (width - 1);
638+
// Apply rounding. Do an integer add of `0` or `1` on the raw
639+
// result, depending on whether rounding is needed. Rounding can
640+
// lead to a floating-point overflow, but we don't need to
641+
// special-case that here because it turns out that IEEE 754 floats
642+
// are encoded such that when an integer add of `1` carries into
643+
// the bits of the exponent field, it produces the correct encoding
644+
// for infinity.
610645
let bits = bits
611646
+ (((discarded_significand & msb != 0)
612647
&& ((discarded_significand & !msb != 0) ||
613648
discarded_extra_nonzero ||
614649
// ties to even
615650
(encoded_significand & 1 != 0))) as $int);
616651

617-
// Just before we return the bits be sure to handle the sign bit we
652+
// Just before we return the bits, be sure to handle the sign bit we
618653
// found at the beginning.
619-
let bits = if negative {
620-
bits | (1 << (width - 1))
654+
let bits = if is_negative {
655+
bits | msb
621656
} else {
622657
bits
623658
};

0 commit comments

Comments
 (0)