Skip to content

Commit aba0746

Browse files
Perform NaN randomization as part of the interpretation of float bits (#279)
* Perform NaN randomization as part of the interpretation of float bits This removes the `THE_HOST_WANTS_TO` option, which appeared to give hosts permission to interpret NaN bits how they "want to". The change here is to say that all core-wasm NaN bitpatterns are interpreted as the same component-model NaN value. It's my understanding that the `random_nan_bits` function isn't meant to be the precise algorithm that nondeterministic-profile implementations must use, so this doesn't require hosts to do any new randomization work. This change also fixes what appears to be a bug: `lift_flat_variant`/`lower_flat_variant` were calling `reinterpret_i32_as_float`/`reinterpret_float_as_i32` without performing NaN scrambling. By making NaN scrambling be part of interpretation, we ensure that it's performed anywhere interpretation is performed. * Remove now-redundant scramble calls. * Canonicalize rather than scramble on lifting. * Add comments explaining the NaN rules. * Convert code comments into prose text. And reword to better fit it in with the rest of the prose. * Rename `reinterpret_` functions to `encode_`/`decode_`. * Update design/mvp/CanonicalABI.md Co-authored-by: Luke Wagner <mail@lukewagner.name> --------- Co-authored-by: Luke Wagner <mail@lukewagner.name>
1 parent 29126b3 commit aba0746

File tree

3 files changed

+141
-85
lines changed

3 files changed

+141
-85
lines changed

design/mvp/CanonicalABI.md

Lines changed: 78 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -442,8 +442,8 @@ def load(cx, ptr, t):
442442
case S16() : return load_int(cx, ptr, 2, signed=True)
443443
case S32() : return load_int(cx, ptr, 4, signed=True)
444444
case S64() : return load_int(cx, ptr, 8, signed=True)
445-
case Float32() : return maybe_scramble_nan32(reinterpret_i32_as_float(load_int(cx, ptr, 4)))
446-
case Float64() : return maybe_scramble_nan64(reinterpret_i64_as_float(load_int(cx, ptr, 8)))
445+
case Float32() : return decode_i32_as_float(load_int(cx, ptr, 4))
446+
case Float64() : return decode_i64_as_float(load_int(cx, ptr, 8))
447447
case Char() : return convert_i32_to_char(cx, load_int(cx, ptr, 4))
448448
case String() : return load_string(cx, ptr)
449449
case List(t) : return load_list(cx, ptr, t)
@@ -469,51 +469,42 @@ def convert_int_to_bool(i):
469469
return bool(i)
470470
```
471471

472-
Lifting and lowering float values may (from the component's perspective)
473-
non-deterministically modify the sign and payload bits of Not-A-Number (NaN)
474-
values, reflecting the practical reality that different languages, protocols
475-
and CPUs have different effects on NaNs. Although this non-determinism is
476-
expressed in the Python code below as generating a "random" NaN bit-pattern,
477-
native implementations do not need to literally generate a random bit-pattern;
478-
they may canonicalize to an arbitrary fixed NaN value. When a host implements
479-
the [deterministic profile], NaNs are canonicalized to a particular NaN
480-
bit-pattern.
472+
Floats are loaded directly from memory, with the sign and payload information
473+
of NaN values discarded. Consequently, there is only one unique NaN value per
474+
floating-point type. This reflects the practical reality that some languages
475+
and protocols do not preserve these bits. In the Python code below, this is
476+
expressed as canonicalizing NaNs to a particular bit pattern.
477+
478+
See the comments about lowering of float values for a discussion of possible
479+
optimizations.
481480
```python
482481
DETERMINISTIC_PROFILE = False # or True
483-
THE_HOST_WANTS_TO = True # or False
484482
CANONICAL_FLOAT32_NAN = 0x7fc00000
485483
CANONICAL_FLOAT64_NAN = 0x7ff8000000000000
486484

487-
def maybe_scramble_nan32(f):
485+
def canonicalize_nan32(f):
488486
if math.isnan(f):
489-
if DETERMINISTIC_PROFILE:
490-
f = reinterpret_i32_as_float(CANONICAL_FLOAT32_NAN)
491-
elif THE_HOST_WANTS_TO:
492-
f = reinterpret_i32_as_float(random_nan_bits(32, 8))
487+
f = core_f32_reinterpret_i32(CANONICAL_FLOAT32_NAN)
493488
assert(math.isnan(f))
494489
return f
495490

496-
def maybe_scramble_nan64(f):
491+
def canonicalize_nan64(f):
497492
if math.isnan(f):
498-
if DETERMINISTIC_PROFILE:
499-
f = reinterpret_i64_as_float(CANONICAL_FLOAT64_NAN)
500-
elif THE_HOST_WANTS_TO:
501-
f = reinterpret_i64_as_float(random_nan_bits(64, 11))
493+
f = core_f64_reinterpret_i64(CANONICAL_FLOAT64_NAN)
502494
assert(math.isnan(f))
503495
return f
504496

505-
def reinterpret_i32_as_float(i):
497+
def decode_i32_as_float(i):
498+
return canonicalize_nan32(core_f32_reinterpret_i32(i))
499+
500+
def decode_i64_as_float(i):
501+
return canonicalize_nan64(core_f64_reinterpret_i64(i))
502+
503+
def core_f32_reinterpret_i32(i):
506504
return struct.unpack('!f', struct.pack('!I', i))[0] # f32.reinterpret_i32
507505

508-
def reinterpret_i64_as_float(i):
506+
def core_f64_reinterpret_i64(i):
509507
return struct.unpack('!d', struct.pack('!Q', i))[0] # f64.reinterpret_i64
510-
511-
def random_nan_bits(total_bits, exponent_bits):
512-
fraction_bits = total_bits - exponent_bits - 1
513-
bits = random.getrandbits(total_bits)
514-
bits |= ((1 << exponent_bits) - 1) << fraction_bits
515-
bits |= 1 << random.randrange(fraction_bits - 1)
516-
return bits
517508
```
518509

519510
An `i32` is converted to a `char` (a [Unicode Scalar Value]) by dynamically
@@ -703,8 +694,8 @@ def store(cx, v, t, ptr):
703694
case S16() : store_int(cx, v, ptr, 2, signed=True)
704695
case S32() : store_int(cx, v, ptr, 4, signed=True)
705696
case S64() : store_int(cx, v, ptr, 8, signed=True)
706-
case Float32() : store_int(cx, reinterpret_float_as_i32(maybe_scramble_nan32(v)), ptr, 4)
707-
case Float64() : store_int(cx, reinterpret_float_as_i64(maybe_scramble_nan64(v)), ptr, 8)
697+
case Float32() : store_int(cx, encode_float_as_i32(v), ptr, 4)
698+
case Float64() : store_int(cx, encode_float_as_i64(v), ptr, 8)
708699
case Char() : store_int(cx, char_to_i32(v), ptr, 4)
709700
case String() : store_string(cx, v, ptr)
710701
case List(t) : store_list(cx, v, ptr, t)
@@ -724,13 +715,55 @@ def store_int(cx, v, ptr, nbytes, signed = False):
724715
cx.opts.memory[ptr : ptr+nbytes] = int.to_bytes(v, nbytes, 'little', signed=signed)
725716
```
726717

727-
Floats are stored directly into memory (after the NaN-scrambling described
728-
above):
718+
Floats are stored directly into memory, with the sign and payload bits of NaN
719+
values modified non-deterministically. This reflects the practical reality that
720+
different languages, protocols and CPUs have different effects on NaNs.
721+
722+
Although this non-determinism is expressed in the Python code below as
723+
generating a "random" NaN bit-pattern, native implementations do not need to
724+
use the same "random" algorithm, or even any random algorithm at all. Hosts
725+
may instead chose to canonicalize to an arbitrary fixed NaN value, or even to
726+
the original value of the NaN before lifting, allowing them to optimize away
727+
both the canonicalization of lifting and the randomization of lowering.
728+
729+
When a host implements the [deterministic profile], NaNs are canonicalized to
730+
a particular NaN bit-pattern.
729731
```python
730-
def reinterpret_float_as_i32(f):
732+
def maybe_scramble_nan32(f):
733+
if math.isnan(f):
734+
if DETERMINISTIC_PROFILE:
735+
f = core_f32_reinterpret_i32(CANONICAL_FLOAT32_NAN)
736+
else:
737+
f = core_f32_reinterpret_i32(random_nan_bits(32, 8))
738+
assert(math.isnan(f))
739+
return f
740+
741+
def maybe_scramble_nan64(f):
742+
if math.isnan(f):
743+
if DETERMINISTIC_PROFILE:
744+
f = core_f64_reinterpret_i64(CANONICAL_FLOAT64_NAN)
745+
else:
746+
f = core_f64_reinterpret_i64(random_nan_bits(64, 11))
747+
assert(math.isnan(f))
748+
return f
749+
750+
def random_nan_bits(total_bits, exponent_bits):
751+
fraction_bits = total_bits - exponent_bits - 1
752+
bits = random.getrandbits(total_bits)
753+
bits |= ((1 << exponent_bits) - 1) << fraction_bits
754+
bits |= 1 << random.randrange(fraction_bits - 1)
755+
return bits
756+
757+
def encode_float_as_i32(f):
758+
return core_i32_reinterpret_f32(maybe_scramble_nan32(f))
759+
760+
def encode_float_as_i64(f):
761+
return core_i64_reinterpret_f64(maybe_scramble_nan64(f))
762+
763+
def core_i32_reinterpret_f32(f):
731764
return struct.unpack('!I', struct.pack('!f', f))[0] # i32.reinterpret_f32
732765

733-
def reinterpret_float_as_i64(f):
766+
def core_i64_reinterpret_f64(f):
734767
return struct.unpack('!Q', struct.pack('!d', f))[0] # i64.reinterpret_f64
735768
```
736769

@@ -1181,8 +1214,8 @@ def lift_flat(cx, vi, t):
11811214
case S16() : return lift_flat_signed(vi, 32, 16)
11821215
case S32() : return lift_flat_signed(vi, 32, 32)
11831216
case S64() : return lift_flat_signed(vi, 64, 64)
1184-
case Float32() : return maybe_scramble_nan32(vi.next('f32'))
1185-
case Float64() : return maybe_scramble_nan64(vi.next('f64'))
1217+
case Float32() : return canonicalize_nan32(vi.next('f32'))
1218+
case Float64() : return canonicalize_nan64(vi.next('f64'))
11861219
case Char() : return convert_i32_to_char(cx, vi.next('i32'))
11871220
case String() : return lift_flat_string(cx, vi)
11881221
case List(t) : return lift_flat_list(cx, vi, t)
@@ -1256,10 +1289,10 @@ def lift_flat_variant(cx, vi, cases):
12561289
have = flat_types.pop(0)
12571290
x = vi.next(have)
12581291
match (have, want):
1259-
case ('i32', 'f32') : return reinterpret_i32_as_float(x)
1292+
case ('i32', 'f32') : return decode_i32_as_float(x)
12601293
case ('i64', 'i32') : return wrap_i64_to_i32(x)
1261-
case ('i64', 'f32') : return reinterpret_i32_as_float(wrap_i64_to_i32(x))
1262-
case ('i64', 'f64') : return reinterpret_i64_as_float(x)
1294+
case ('i64', 'f32') : return decode_i32_as_float(wrap_i64_to_i32(x))
1295+
case ('i64', 'f64') : return decode_i64_as_float(x)
12631296
case _ : return x
12641297
c = cases[case_index]
12651298
if c.t is None:
@@ -1367,10 +1400,10 @@ def lower_flat_variant(cx, v, cases):
13671400
for i,have in enumerate(payload):
13681401
want = flat_types.pop(0)
13691402
match (have.t, want):
1370-
case ('f32', 'i32') : payload[i] = Value('i32', reinterpret_float_as_i32(have.v))
1403+
case ('f32', 'i32') : payload[i] = Value('i32', encode_float_as_i32(have.v))
13711404
case ('i32', 'i64') : payload[i] = Value('i64', have.v)
1372-
case ('f32', 'i64') : payload[i] = Value('i64', reinterpret_float_as_i32(have.v))
1373-
case ('f64', 'i64') : payload[i] = Value('i64', reinterpret_float_as_i64(have.v))
1405+
case ('f32', 'i64') : payload[i] = Value('i64', encode_float_as_i32(have.v))
1406+
case ('f64', 'i64') : payload[i] = Value('i64', encode_float_as_i64(have.v))
13741407
case _ : pass
13751408
for want in flat_types:
13761409
payload.append(Value(want, 0))

design/mvp/canonical-abi/definitions.py

Lines changed: 57 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -394,8 +394,8 @@ def load(cx, ptr, t):
394394
case S16() : return load_int(cx, ptr, 2, signed=True)
395395
case S32() : return load_int(cx, ptr, 4, signed=True)
396396
case S64() : return load_int(cx, ptr, 8, signed=True)
397-
case Float32() : return maybe_scramble_nan32(reinterpret_i32_as_float(load_int(cx, ptr, 4)))
398-
case Float64() : return maybe_scramble_nan64(reinterpret_i64_as_float(load_int(cx, ptr, 8)))
397+
case Float32() : return decode_i32_as_float(load_int(cx, ptr, 4))
398+
case Float64() : return decode_i64_as_float(load_int(cx, ptr, 8))
399399
case Char() : return convert_i32_to_char(cx, load_int(cx, ptr, 4))
400400
case String() : return load_string(cx, ptr)
401401
case List(t) : return load_list(cx, ptr, t)
@@ -413,41 +413,33 @@ def convert_int_to_bool(i):
413413
return bool(i)
414414

415415
DETERMINISTIC_PROFILE = False # or True
416-
THE_HOST_WANTS_TO = True # or False
417416
CANONICAL_FLOAT32_NAN = 0x7fc00000
418417
CANONICAL_FLOAT64_NAN = 0x7ff8000000000000
419418

420-
def maybe_scramble_nan32(f):
419+
def canonicalize_nan32(f):
421420
if math.isnan(f):
422-
if DETERMINISTIC_PROFILE:
423-
f = reinterpret_i32_as_float(CANONICAL_FLOAT32_NAN)
424-
elif THE_HOST_WANTS_TO:
425-
f = reinterpret_i32_as_float(random_nan_bits(32, 8))
421+
f = core_f32_reinterpret_i32(CANONICAL_FLOAT32_NAN)
426422
assert(math.isnan(f))
427423
return f
428424

429-
def maybe_scramble_nan64(f):
425+
def canonicalize_nan64(f):
430426
if math.isnan(f):
431-
if DETERMINISTIC_PROFILE:
432-
f = reinterpret_i64_as_float(CANONICAL_FLOAT64_NAN)
433-
elif THE_HOST_WANTS_TO:
434-
f = reinterpret_i64_as_float(random_nan_bits(64, 11))
427+
f = core_f64_reinterpret_i64(CANONICAL_FLOAT64_NAN)
435428
assert(math.isnan(f))
436429
return f
437430

438-
def reinterpret_i32_as_float(i):
431+
def decode_i32_as_float(i):
432+
return canonicalize_nan32(core_f32_reinterpret_i32(i))
433+
434+
def decode_i64_as_float(i):
435+
return canonicalize_nan64(core_f64_reinterpret_i64(i))
436+
437+
def core_f32_reinterpret_i32(i):
439438
return struct.unpack('!f', struct.pack('!I', i))[0] # f32.reinterpret_i32
440439

441-
def reinterpret_i64_as_float(i):
440+
def core_f64_reinterpret_i64(i):
442441
return struct.unpack('!d', struct.pack('!Q', i))[0] # f64.reinterpret_i64
443442

444-
def random_nan_bits(total_bits, exponent_bits):
445-
fraction_bits = total_bits - exponent_bits - 1
446-
bits = random.getrandbits(total_bits)
447-
bits |= ((1 << exponent_bits) - 1) << fraction_bits
448-
bits |= 1 << random.randrange(fraction_bits - 1)
449-
return bits
450-
451443
def convert_i32_to_char(cx, i):
452444
trap_if(i >= 0x110000)
453445
trap_if(0xD800 <= i <= 0xDFFF)
@@ -573,8 +565,8 @@ def store(cx, v, t, ptr):
573565
case S16() : store_int(cx, v, ptr, 2, signed=True)
574566
case S32() : store_int(cx, v, ptr, 4, signed=True)
575567
case S64() : store_int(cx, v, ptr, 8, signed=True)
576-
case Float32() : store_int(cx, reinterpret_float_as_i32(maybe_scramble_nan32(v)), ptr, 4)
577-
case Float64() : store_int(cx, reinterpret_float_as_i64(maybe_scramble_nan64(v)), ptr, 8)
568+
case Float32() : store_int(cx, encode_float_as_i32(v), ptr, 4)
569+
case Float64() : store_int(cx, encode_float_as_i64(v), ptr, 8)
578570
case Char() : store_int(cx, char_to_i32(v), ptr, 4)
579571
case String() : store_string(cx, v, ptr)
580572
case List(t) : store_list(cx, v, ptr, t)
@@ -587,10 +579,41 @@ def store(cx, v, t, ptr):
587579
def store_int(cx, v, ptr, nbytes, signed = False):
588580
cx.opts.memory[ptr : ptr+nbytes] = int.to_bytes(v, nbytes, 'little', signed=signed)
589581

590-
def reinterpret_float_as_i32(f):
582+
def maybe_scramble_nan32(f):
583+
if math.isnan(f):
584+
if DETERMINISTIC_PROFILE:
585+
f = core_f32_reinterpret_i32(CANONICAL_FLOAT32_NAN)
586+
else:
587+
f = core_f32_reinterpret_i32(random_nan_bits(32, 8))
588+
assert(math.isnan(f))
589+
return f
590+
591+
def maybe_scramble_nan64(f):
592+
if math.isnan(f):
593+
if DETERMINISTIC_PROFILE:
594+
f = core_f64_reinterpret_i64(CANONICAL_FLOAT64_NAN)
595+
else:
596+
f = core_f64_reinterpret_i64(random_nan_bits(64, 11))
597+
assert(math.isnan(f))
598+
return f
599+
600+
def random_nan_bits(total_bits, exponent_bits):
601+
fraction_bits = total_bits - exponent_bits - 1
602+
bits = random.getrandbits(total_bits)
603+
bits |= ((1 << exponent_bits) - 1) << fraction_bits
604+
bits |= 1 << random.randrange(fraction_bits - 1)
605+
return bits
606+
607+
def encode_float_as_i32(f):
608+
return core_i32_reinterpret_f32(maybe_scramble_nan32(f))
609+
610+
def encode_float_as_i64(f):
611+
return core_i64_reinterpret_f64(maybe_scramble_nan64(f))
612+
613+
def core_i32_reinterpret_f32(f):
591614
return struct.unpack('!I', struct.pack('!f', f))[0] # i32.reinterpret_f32
592615

593-
def reinterpret_float_as_i64(f):
616+
def core_i64_reinterpret_f64(f):
594617
return struct.unpack('!Q', struct.pack('!d', f))[0] # i64.reinterpret_f64
595618

596619
def char_to_i32(c):
@@ -893,8 +916,8 @@ def lift_flat(cx, vi, t):
893916
case S16() : return lift_flat_signed(vi, 32, 16)
894917
case S32() : return lift_flat_signed(vi, 32, 32)
895918
case S64() : return lift_flat_signed(vi, 64, 64)
896-
case Float32() : return maybe_scramble_nan32(vi.next('f32'))
897-
case Float64() : return maybe_scramble_nan64(vi.next('f64'))
919+
case Float32() : return canonicalize_nan32(vi.next('f32'))
920+
case Float64() : return canonicalize_nan64(vi.next('f64'))
898921
case Char() : return convert_i32_to_char(cx, vi.next('i32'))
899922
case String() : return lift_flat_string(cx, vi)
900923
case List(t) : return lift_flat_list(cx, vi, t)
@@ -943,10 +966,10 @@ def next(self, want):
943966
have = flat_types.pop(0)
944967
x = vi.next(have)
945968
match (have, want):
946-
case ('i32', 'f32') : return reinterpret_i32_as_float(x)
969+
case ('i32', 'f32') : return decode_i32_as_float(x)
947970
case ('i64', 'i32') : return wrap_i64_to_i32(x)
948-
case ('i64', 'f32') : return reinterpret_i32_as_float(wrap_i64_to_i32(x))
949-
case ('i64', 'f64') : return reinterpret_i64_as_float(x)
971+
case ('i64', 'f32') : return decode_i32_as_float(wrap_i64_to_i32(x))
972+
case ('i64', 'f64') : return decode_i64_as_float(x)
950973
case _ : return x
951974
c = cases[case_index]
952975
if c.t is None:
@@ -1024,10 +1047,10 @@ def lower_flat_variant(cx, v, cases):
10241047
for i,have in enumerate(payload):
10251048
want = flat_types.pop(0)
10261049
match (have.t, want):
1027-
case ('f32', 'i32') : payload[i] = Value('i32', reinterpret_float_as_i32(have.v))
1050+
case ('f32', 'i32') : payload[i] = Value('i32', encode_float_as_i32(have.v))
10281051
case ('i32', 'i64') : payload[i] = Value('i64', have.v)
1029-
case ('f32', 'i64') : payload[i] = Value('i64', reinterpret_float_as_i32(have.v))
1030-
case ('f64', 'i64') : payload[i] = Value('i64', reinterpret_float_as_i64(have.v))
1052+
case ('f32', 'i64') : payload[i] = Value('i64', encode_float_as_i32(have.v))
1053+
case ('f64', 'i64') : payload[i] = Value('i64', encode_float_as_i64(have.v))
10311054
case _ : pass
10321055
for want in flat_types:
10331056
payload.append(Value(want, 0))

design/mvp/canonical-abi/run_tests.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -154,30 +154,30 @@ def test_pairs(t, pairs):
154154
test_pairs(Enum(['a','b']), [(0,{'a':None}), (1,{'b':None}), (2,None)])
155155

156156
def test_nan32(inbits, outbits):
157-
origf = reinterpret_i32_as_float(inbits)
157+
origf = decode_i32_as_float(inbits)
158158
f = lift_flat(mk_cx(), ValueIter([Value('f32', origf)]), Float32())
159159
if DETERMINISTIC_PROFILE:
160-
assert(reinterpret_float_as_i32(f) == outbits)
160+
assert(encode_float_as_i32(f) == outbits)
161161
else:
162162
assert(not math.isnan(origf) or math.isnan(f))
163163
cx = mk_cx(int.to_bytes(inbits, 4, 'little'))
164164
f = load(cx, 0, Float32())
165165
if DETERMINISTIC_PROFILE:
166-
assert(reinterpret_float_as_i32(f) == outbits)
166+
assert(encode_float_as_i32(f) == outbits)
167167
else:
168168
assert(not math.isnan(origf) or math.isnan(f))
169169

170170
def test_nan64(inbits, outbits):
171-
origf = reinterpret_i64_as_float(inbits)
171+
origf = decode_i64_as_float(inbits)
172172
f = lift_flat(mk_cx(), ValueIter([Value('f64', origf)]), Float64())
173173
if DETERMINISTIC_PROFILE:
174-
assert(reinterpret_float_as_i64(f) == outbits)
174+
assert(encode_float_as_i64(f) == outbits)
175175
else:
176176
assert(not math.isnan(origf) or math.isnan(f))
177177
cx = mk_cx(int.to_bytes(inbits, 8, 'little'))
178178
f = load(cx, 0, Float64())
179179
if DETERMINISTIC_PROFILE:
180-
assert(reinterpret_float_as_i64(f) == outbits)
180+
assert(encode_float_as_i64(f) == outbits)
181181
else:
182182
assert(not math.isnan(origf) or math.isnan(f))
183183

0 commit comments

Comments
 (0)