Skip to content

Commit e945a85

Browse files
authored
Don't force deterministic NaN canonicalization (#260)
Resolves #247
1 parent 5a11e36 commit e945a85

File tree

4 files changed

+105
-62
lines changed

4 files changed

+105
-62
lines changed

design/mvp/CanonicalABI.md

Lines changed: 47 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -436,8 +436,8 @@ def load(cx, ptr, t):
436436
case S16() : return load_int(cx, ptr, 2, signed=True)
437437
case S32() : return load_int(cx, ptr, 4, signed=True)
438438
case S64() : return load_int(cx, ptr, 8, signed=True)
439-
case Float32() : return canonicalize32(reinterpret_i32_as_float(load_int(cx, ptr, 4)))
440-
case Float64() : return canonicalize64(reinterpret_i64_as_float(load_int(cx, ptr, 8)))
439+
case Float32() : return maybe_scramble_nan32(reinterpret_i32_as_float(load_int(cx, ptr, 4)))
440+
case Float64() : return maybe_scramble_nan64(reinterpret_i64_as_float(load_int(cx, ptr, 8)))
441441
case Char() : return convert_i32_to_char(cx, load_int(cx, ptr, 4))
442442
case String() : return load_string(cx, ptr)
443443
case List(t) : return load_list(cx, ptr, t)
@@ -463,28 +463,51 @@ def convert_int_to_bool(i):
463463
return bool(i)
464464
```
465465

466-
For reasons [given](Explainer.md#type-definitions) in the explainer, floats are
467-
loaded from memory and then "canonicalized", mapping all Not-a-Number bit
468-
patterns to a single canonical `nan` value.
466+
Lifting and lowering float values may (from the component's perspective)
467+
non-deterministically modify the sign and payload bits of Not-A-Number (NaN)
468+
values, reflecting the practical reality that different languages, protocols
469+
and CPUs have different effects on NaNs. Although this non-determinism is
470+
expressed in the Python code below as generating a "random" NaN bit-pattern,
471+
native implementations do not need to literally generate a random bit-pattern;
472+
they may canonicalize to an arbitrary fixed NaN value. When a host implements
473+
the [deterministic profile], NaNs are canonicalized to a particular NaN
474+
bit-pattern.
469475
```python
470-
def reinterpret_i32_as_float(i):
471-
return struct.unpack('!f', struct.pack('!I', i))[0] # f32.reinterpret_i32
472-
473-
def reinterpret_i64_as_float(i):
474-
return struct.unpack('!d', struct.pack('!Q', i))[0] # f64.reinterpret_i64
475-
476+
DETERMINISTIC_PROFILE = False # or True
477+
THE_HOST_WANTS_TO = True # or False
476478
CANONICAL_FLOAT32_NAN = 0x7fc00000
477479
CANONICAL_FLOAT64_NAN = 0x7ff8000000000000
478480

479-
def canonicalize32(f):
481+
def maybe_scramble_nan32(f):
480482
if math.isnan(f):
481-
return reinterpret_i32_as_float(CANONICAL_FLOAT32_NAN)
483+
if DETERMINISTIC_PROFILE:
484+
f = reinterpret_i32_as_float(CANONICAL_FLOAT32_NAN)
485+
elif THE_HOST_WANTS_TO:
486+
f = reinterpret_i32_as_float(random_nan_bits(32, 8))
487+
assert(math.isnan(f))
482488
return f
483489

484-
def canonicalize64(f):
490+
def maybe_scramble_nan64(f):
485491
if math.isnan(f):
486-
return reinterpret_i64_as_float(CANONICAL_FLOAT64_NAN)
492+
if DETERMINISTIC_PROFILE:
493+
f = reinterpret_i64_as_float(CANONICAL_FLOAT64_NAN)
494+
elif THE_HOST_WANTS_TO:
495+
f = reinterpret_i64_as_float(random_nan_bits(64, 11))
496+
assert(math.isnan(f))
487497
return f
498+
499+
def reinterpret_i32_as_float(i):
500+
return struct.unpack('!f', struct.pack('!I', i))[0] # f32.reinterpret_i32
501+
502+
def reinterpret_i64_as_float(i):
503+
return struct.unpack('!d', struct.pack('!Q', i))[0] # f64.reinterpret_i64
504+
505+
def random_nan_bits(total_bits, exponent_bits):
506+
fraction_bits = total_bits - exponent_bits - 1
507+
bits = random.getrandbits(total_bits)
508+
bits |= ((1 << exponent_bits) - 1) << fraction_bits
509+
bits |= 1 << random.randrange(fraction_bits - 1)
510+
return bits
488511
```
489512

490513
An `i32` is converted to a `char` (a [Unicode Scalar Value]) by dynamically
@@ -674,8 +697,8 @@ def store(cx, v, t, ptr):
674697
case S16() : store_int(cx, v, ptr, 2, signed=True)
675698
case S32() : store_int(cx, v, ptr, 4, signed=True)
676699
case S64() : store_int(cx, v, ptr, 8, signed=True)
677-
case Float32() : store_int(cx, reinterpret_float_as_i32(canonicalize32(v)), ptr, 4)
678-
case Float64() : store_int(cx, reinterpret_float_as_i64(canonicalize64(v)), ptr, 8)
700+
case Float32() : store_int(cx, reinterpret_float_as_i32(maybe_scramble_nan32(v)), ptr, 4)
701+
case Float64() : store_int(cx, reinterpret_float_as_i64(maybe_scramble_nan64(v)), ptr, 8)
679702
case Char() : store_int(cx, char_to_i32(v), ptr, 4)
680703
case String() : store_string(cx, v, ptr)
681704
case List(t) : store_list(cx, v, ptr, t)
@@ -695,9 +718,8 @@ def store_int(cx, v, ptr, nbytes, signed = False):
695718
cx.opts.memory[ptr : ptr+nbytes] = int.to_bytes(v, nbytes, 'little', signed=signed)
696719
```
697720

698-
Floats are stored directly into memory (in the case of NaNs, using the
699-
32-/64-bit canonical NaN bit pattern selected by
700-
`canonicalize32`/`canonicalize64`):
721+
Floats are stored directly into memory (after the NaN-scrambling described
722+
above):
701723
```python
702724
def reinterpret_float_as_i32(f):
703725
return struct.unpack('!I', struct.pack('!f', f))[0] # i32.reinterpret_f32
@@ -1153,8 +1175,8 @@ def lift_flat(cx, vi, t):
11531175
case S16() : return lift_flat_signed(vi, 32, 16)
11541176
case S32() : return lift_flat_signed(vi, 32, 32)
11551177
case S64() : return lift_flat_signed(vi, 64, 64)
1156-
case Float32() : return canonicalize32(vi.next('f32'))
1157-
case Float64() : return canonicalize64(vi.next('f64'))
1178+
case Float32() : return maybe_scramble_nan32(vi.next('f32'))
1179+
case Float64() : return maybe_scramble_nan64(vi.next('f64'))
11581180
case Char() : return convert_i32_to_char(cx, vi.next('i32'))
11591181
case String() : return lift_flat_string(cx, vi)
11601182
case List(t) : return lift_flat_list(cx, vi, t)
@@ -1277,8 +1299,8 @@ def lower_flat(cx, v, t):
12771299
case S16() : return lower_flat_signed(v, 32)
12781300
case S32() : return lower_flat_signed(v, 32)
12791301
case S64() : return lower_flat_signed(v, 64)
1280-
case Float32() : return [Value('f32', canonicalize32(v))]
1281-
case Float64() : return [Value('f64', canonicalize64(v))]
1302+
case Float32() : return [Value('f32', maybe_scramble_nan32(v))]
1303+
case Float64() : return [Value('f64', maybe_scramble_nan64(v))]
12821304
case Char() : return [Value('i32', char_to_i32(v))]
12831305
case String() : return lower_flat_string(cx, v)
12841306
case List(t) : return lower_flat_list(cx, v, t)
@@ -1656,6 +1678,7 @@ component instance defining a resource can access its representation.
16561678
[Multi-value]: https://github.com/WebAssembly/multi-value/blob/master/proposals/multi-value/Overview.md
16571679
[Exceptions]: https://github.com/WebAssembly/exception-handling/blob/main/proposals/exception-handling/Exceptions.md
16581680
[WASI]: https://github.com/webassembly/wasi
1681+
[Deterministic Profile]: https://github.com/WebAssembly/profiles/blob/main/proposals/profiles/Overview.md
16591682

16601683
[Alignment]: https://en.wikipedia.org/wiki/Data_structure_alignment
16611684
[UTF-8]: https://en.wikipedia.org/wiki/UTF-8

design/mvp/Explainer.md

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,7 @@ sets of abstract values:
545545
| `bool` | `true` and `false` |
546546
| `s8`, `s16`, `s32`, `s64` | integers in the range [-2<sup>N-1</sup>, 2<sup>N-1</sup>-1] |
547547
| `u8`, `u16`, `u32`, `u64` | integers in the range [0, 2<sup>N</sup>-1] |
548-
| `float32`, `float64` | [IEEE754] floating-pointer numbers with a single, canonical "Not a Number" ([NaN]) value |
548+
| `float32`, `float64` | [IEEE754] floating-point numbers |
549549
| `char` | [Unicode Scalar Values] |
550550
| `record` | heterogeneous [tuples] of named values |
551551
| `variant` | heterogeneous [tagged unions] of named values |
@@ -560,15 +560,6 @@ For example, while abstract `variant`s contain a list of `case`s labelled by
560560
name, canonical lifting and lowering map each case to an `i32` value starting
561561
at `0`.
562562

563-
The `float32` and `float64` values have their NaNs canonicalized to a single
564-
value so that:
565-
1. consumers of NaN values are free to use the rest of the NaN payload for
566-
optimization purposes (like [NaN boxing]) without needing to worry about
567-
whether the NaN payload bits were significant; and
568-
2. producers of NaN values across component boundaries do not develop brittle
569-
assumptions that NaN payload bits are preserved by the other side (since
570-
they often aren't).
571-
572563
The `own` and `borrow` value types are both *handle types*. Handles logically
573564
contain the opaque address of a resource and avoid copying the resource when
574565
passed across component boundaries. By way of metaphor to operating systems,
@@ -1666,7 +1657,7 @@ At a high level, the additional coercions would be:
16661657
| `u8`, `u16`, `u32` | as a Number value | `ToUint8`, `ToUint16`, `ToUint32` |
16671658
| `s64` | as a BigInt value | `ToBigInt64` |
16681659
| `u64` | as a BigInt value | `ToBigUint64` |
1669-
| `float32`, `float64` | as a Number, mapping the canonical NaN to [JS NaN] | `ToNumber` mapping [JS NaN] to the canonical NaN |
1660+
| `float32`, `float64` | as a Number value | `ToNumber` |
16701661
| `char` | same as [`USVString`] | same as [`USVString`], throw if the USV length is not 1 |
16711662
| `record` | TBD: maybe a [JS Record]? | same as [`dictionary`] |
16721663
| `variant` | see below | see below |
@@ -1848,7 +1839,6 @@ and will be added over the coming months to complete the MVP proposal:
18481839
[`enum`]: https://webidl.spec.whatwg.org/#es-enumeration
18491840
[`T?`]: https://webidl.spec.whatwg.org/#es-nullable-type
18501841
[`Get`]: https://tc39.es/ecma262/#sec-get-o-p
1851-
[JS NaN]: https://tc39.es/ecma262/#sec-ecmascript-language-types-number-type
18521842
[Import Reflection]: https://github.com/tc39-transfer/proposal-import-reflection
18531843
[Module Record]: https://tc39.es/ecma262/#sec-abstract-module-records
18541844
[Module Specifier]: https://tc39.es/ecma262/multipage/ecmascript-language-scripts-and-modules.html#prod-ModuleSpecifier
@@ -1864,8 +1854,6 @@ and will be added over the coming months to complete the MVP proposal:
18641854
[Closure]: https://en.wikipedia.org/wiki/Closure_(computer_programming)
18651855
[Empty Type]: https://en.wikipedia.org/w/index.php?title=Empty_type
18661856
[IEEE754]: https://en.wikipedia.org/wiki/IEEE_754
1867-
[NaN]: https://en.wikipedia.org/wiki/NaN
1868-
[NaN Boxing]: https://wingolog.org/archives/2011/05/18/value-representation-in-javascript-implementations
18691857
[Unicode Scalar Values]: https://unicode.org/glossary/#unicode_scalar_value
18701858
[Tuples]: https://en.wikipedia.org/wiki/Tuple
18711859
[Tagged Unions]: https://en.wikipedia.org/wiki/Tagged_union

design/mvp/canonical-abi/definitions.py

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from __future__ import annotations
88
import math
99
import struct
10+
import random
1011
from dataclasses import dataclass
1112
from typing import Optional
1213
from typing import Callable
@@ -392,8 +393,8 @@ def load(cx, ptr, t):
392393
case S16() : return load_int(cx, ptr, 2, signed=True)
393394
case S32() : return load_int(cx, ptr, 4, signed=True)
394395
case S64() : return load_int(cx, ptr, 8, signed=True)
395-
case Float32() : return canonicalize32(reinterpret_i32_as_float(load_int(cx, ptr, 4)))
396-
case Float64() : return canonicalize64(reinterpret_i64_as_float(load_int(cx, ptr, 8)))
396+
case Float32() : return maybe_scramble_nan32(reinterpret_i32_as_float(load_int(cx, ptr, 4)))
397+
case Float64() : return maybe_scramble_nan64(reinterpret_i64_as_float(load_int(cx, ptr, 8)))
397398
case Char() : return convert_i32_to_char(cx, load_int(cx, ptr, 4))
398399
case String() : return load_string(cx, ptr)
399400
case List(t) : return load_list(cx, ptr, t)
@@ -410,25 +411,42 @@ def convert_int_to_bool(i):
410411
assert(i >= 0)
411412
return bool(i)
412413

413-
def reinterpret_i32_as_float(i):
414-
return struct.unpack('!f', struct.pack('!I', i))[0] # f32.reinterpret_i32
415-
416-
def reinterpret_i64_as_float(i):
417-
return struct.unpack('!d', struct.pack('!Q', i))[0] # f64.reinterpret_i64
418-
414+
DETERMINISTIC_PROFILE = False # or True
415+
THE_HOST_WANTS_TO = True # or False
419416
CANONICAL_FLOAT32_NAN = 0x7fc00000
420417
CANONICAL_FLOAT64_NAN = 0x7ff8000000000000
421418

422-
def canonicalize32(f):
419+
def maybe_scramble_nan32(f):
423420
if math.isnan(f):
424-
return reinterpret_i32_as_float(CANONICAL_FLOAT32_NAN)
421+
if DETERMINISTIC_PROFILE:
422+
f = reinterpret_i32_as_float(CANONICAL_FLOAT32_NAN)
423+
elif THE_HOST_WANTS_TO:
424+
f = reinterpret_i32_as_float(random_nan_bits(32, 8))
425+
assert(math.isnan(f))
425426
return f
426427

427-
def canonicalize64(f):
428+
def maybe_scramble_nan64(f):
428429
if math.isnan(f):
429-
return reinterpret_i64_as_float(CANONICAL_FLOAT64_NAN)
430+
if DETERMINISTIC_PROFILE:
431+
f = reinterpret_i64_as_float(CANONICAL_FLOAT64_NAN)
432+
elif THE_HOST_WANTS_TO:
433+
f = reinterpret_i64_as_float(random_nan_bits(64, 11))
434+
assert(math.isnan(f))
430435
return f
431436

437+
def reinterpret_i32_as_float(i):
438+
return struct.unpack('!f', struct.pack('!I', i))[0] # f32.reinterpret_i32
439+
440+
def reinterpret_i64_as_float(i):
441+
return struct.unpack('!d', struct.pack('!Q', i))[0] # f64.reinterpret_i64
442+
443+
def random_nan_bits(total_bits, exponent_bits):
444+
fraction_bits = total_bits - exponent_bits - 1
445+
bits = random.getrandbits(total_bits)
446+
bits |= ((1 << exponent_bits) - 1) << fraction_bits
447+
bits |= 1 << random.randrange(fraction_bits - 1)
448+
return bits
449+
432450
def convert_i32_to_char(cx, i):
433451
trap_if(i >= 0x110000)
434452
trap_if(0xD800 <= i <= 0xDFFF)
@@ -554,8 +572,8 @@ def store(cx, v, t, ptr):
554572
case S16() : store_int(cx, v, ptr, 2, signed=True)
555573
case S32() : store_int(cx, v, ptr, 4, signed=True)
556574
case S64() : store_int(cx, v, ptr, 8, signed=True)
557-
case Float32() : store_int(cx, reinterpret_float_as_i32(canonicalize32(v)), ptr, 4)
558-
case Float64() : store_int(cx, reinterpret_float_as_i64(canonicalize64(v)), ptr, 8)
575+
case Float32() : store_int(cx, reinterpret_float_as_i32(maybe_scramble_nan32(v)), ptr, 4)
576+
case Float64() : store_int(cx, reinterpret_float_as_i64(maybe_scramble_nan64(v)), ptr, 8)
559577
case Char() : store_int(cx, char_to_i32(v), ptr, 4)
560578
case String() : store_string(cx, v, ptr)
561579
case List(t) : store_list(cx, v, ptr, t)
@@ -874,8 +892,8 @@ def lift_flat(cx, vi, t):
874892
case S16() : return lift_flat_signed(vi, 32, 16)
875893
case S32() : return lift_flat_signed(vi, 32, 32)
876894
case S64() : return lift_flat_signed(vi, 64, 64)
877-
case Float32() : return canonicalize32(vi.next('f32'))
878-
case Float64() : return canonicalize64(vi.next('f64'))
895+
case Float32() : return maybe_scramble_nan32(vi.next('f32'))
896+
case Float64() : return maybe_scramble_nan64(vi.next('f64'))
879897
case Char() : return convert_i32_to_char(cx, vi.next('i32'))
880898
case String() : return lift_flat_string(cx, vi)
881899
case List(t) : return lift_flat_list(cx, vi, t)
@@ -963,8 +981,8 @@ def lower_flat(cx, v, t):
963981
case S16() : return lower_flat_signed(v, 32)
964982
case S32() : return lower_flat_signed(v, 32)
965983
case S64() : return lower_flat_signed(v, 64)
966-
case Float32() : return [Value('f32', canonicalize32(v))]
967-
case Float64() : return [Value('f64', canonicalize64(v))]
984+
case Float32() : return [Value('f32', maybe_scramble_nan32(v))]
985+
case Float64() : return [Value('f64', maybe_scramble_nan64(v))]
968986
case Char() : return [Value('i32', char_to_i32(v))]
969987
case String() : return lower_flat_string(cx, v)
970988
case List(t) : return lower_flat_list(cx, v, t)

design/mvp/canonical-abi/run_tests.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -154,18 +154,32 @@ def test_pairs(t, pairs):
154154
test_pairs(Enum(['a','b']), [(0,{'a':None}), (1,{'b':None}), (2,None)])
155155

156156
def test_nan32(inbits, outbits):
157-
f = lift_flat(mk_cx(), ValueIter([Value('f32', reinterpret_i32_as_float(inbits))]), Float32())
158-
assert(reinterpret_float_as_i32(f) == outbits)
157+
origf = reinterpret_i32_as_float(inbits)
158+
f = lift_flat(mk_cx(), ValueIter([Value('f32', origf)]), Float32())
159+
if DETERMINISTIC_PROFILE:
160+
assert(reinterpret_float_as_i32(f) == outbits)
161+
else:
162+
assert(not math.isnan(origf) or math.isnan(f))
159163
cx = mk_cx(int.to_bytes(inbits, 4, 'little'))
160164
f = load(cx, 0, Float32())
161-
assert(reinterpret_float_as_i32(f) == outbits)
165+
if DETERMINISTIC_PROFILE:
166+
assert(reinterpret_float_as_i32(f) == outbits)
167+
else:
168+
assert(not math.isnan(origf) or math.isnan(f))
162169

163170
def test_nan64(inbits, outbits):
164-
f = lift_flat(mk_cx(), ValueIter([Value('f64', reinterpret_i64_as_float(inbits))]), Float64())
165-
assert(reinterpret_float_as_i64(f) == outbits)
171+
origf = reinterpret_i64_as_float(inbits)
172+
f = lift_flat(mk_cx(), ValueIter([Value('f64', origf)]), Float64())
173+
if DETERMINISTIC_PROFILE:
174+
assert(reinterpret_float_as_i64(f) == outbits)
175+
else:
176+
assert(not math.isnan(origf) or math.isnan(f))
166177
cx = mk_cx(int.to_bytes(inbits, 8, 'little'))
167178
f = load(cx, 0, Float64())
168-
assert(reinterpret_float_as_i64(f) == outbits)
179+
if DETERMINISTIC_PROFILE:
180+
assert(reinterpret_float_as_i64(f) == outbits)
181+
else:
182+
assert(not math.isnan(origf) or math.isnan(f))
169183

170184
test_nan32(0x7fc00000, CANONICAL_FLOAT32_NAN)
171185
test_nan32(0x7fc00001, CANONICAL_FLOAT32_NAN)

0 commit comments

Comments
 (0)