From 0beaf03fe299c03445b23c2e2f52907305ce42ec Mon Sep 17 00:00:00 2001 From: unknown <71151164+ZERICO2005@users.noreply.github.com> Date: Tue, 8 Jul 2025 00:00:02 -0600 Subject: [PATCH] Implemented dtof in assembly --- src/crt/dtof.src | 242 ++++++++++++++++-- .../float64_to_float32/src/f64_to_f32_LUT.h | 14 +- .../float64_to_float32/src/main.c | 161 ++++++++++++ 3 files changed, 397 insertions(+), 20 deletions(-) diff --git a/src/crt/dtof.src b/src/crt/dtof.src index 1dac34205..53e663032 100644 --- a/src/crt/dtof.src +++ b/src/crt/dtof.src @@ -1,18 +1,224 @@ - assume adl=1 - - section .text - - public __dtof - -__dtof: - ; f64_ret_f32 - push af, iy, bc, de, hl - call ___f64_to_f32 - pop af - ld a, e - pop de - ld e, a - pop bc, iy, af - ret - - extern ___f64_to_f32 + assume adl=1 + + section .text + + public __dtof + + private __dtof_helper +__dtof_helper: + ; Moving this block of code to be behind __dtof ensures that + ; __dtof.ret_copysign can always be reached by jr in all paths. +.overflow: + ; carry is set here + pop hl + ; A = $10 + add a, c ; attempts to overflow the low 4 bits of the exponent + rl b ; (0x7F << 1) | 1 if the input is inf/NaN + inc b ; B will only be zero if the input was inf/NaN + jr nz, .not_inf_nan + + ; carry is cleared + adc hl, hl + jr nz, .has_payload + ld a, e + rla + and a, $3F + jr z, .no_payload +.has_payload: + set 5, e ; ensure that NaN stays NaN +.no_payload: + ld a, c + push de + pop bc + ld l, 5 + call __lshru + push bc + pop hl +.finish_inf_nan: + ld a, $7F + jr __dtof.ret_copysign +.not_inf_nan: + ; return infinity + ld hl, $800000 + jr .finish_inf_nan + +; Convert BC:UDE:UHL F64 to E:UHL F32 +; Rounding: round to nearest with ties to even +; Behaviour: +; Underflow: Returns signed zero. No signals raised. +; Subnormal: No signals raised. +; Rounded to Infinity: No signals raised. +; Overflow: Returns signed infinity. No signals raised. +; Signaling NaN: Quiet bit preserved. No signals raised. +; Quiet NaN: Quiet bit preserved. No signals raised. +; NaN Payloads: Copies the most significant payload bits. The LSB of mantissa is set if payload bits were discarded/truncated out. +__dtof: + bit 7, b + push af ; preserve A and signbit + push bc + push de + push hl + ; clear UBC + inc bc + dec.s bc + res 7, b + ld hl, -$3810 + add hl, bc + jr nc, .maybe_subnormal + ld hl, -$47F0 ; $FFB810 + ld a, l ; ld a, $10 + add hl, bc + jr c, __dtof_helper.overflow + ; result is normal or rounds to infinity + ; calculate new exponent + ; we only need the low 8 bits of the exponent + add hl, hl + add hl, hl + add hl, hl + add hl, hl + ; offset = -$380 - -$47F = $FF = -1 ; therefore decrement + dec h ; store new exponent + ld l, 29 ; f64_mant_bits - f32_mant_bits = 52 - 23 = 29 + ex (sp), hl ; (SP) = exponent/shift, HL = lo24 + + ; clear exponent + dec a ; ld a, $0F + and a, c + ld c, a + xor a, a + ld b, a + ; test round bit + bit 4, e + jr z, .round_down + ; test guard bit + ld a, e + and a, $20 + jr nz, .round_up + ; test sticky bits + inc a ; make A non-zero + adc hl, hl + jr nz, .round_up + ld a, e + rla + and a, $1F +.round_up: +.round_down: + call __llshru + or a, a + jr z, .no_round + inc hl ; does not overflow +.no_round: + pop af ; a = exponent, flags = 29 = ---5H3V-C + or a, a + rra + jr nc, .even_exponent + ld bc, $800000 + add hl, bc ; the result might be rounded to infinity here + adc a, c ; adc a, 0 ; wont overflow +.even_exponent: +.subnormal_no_round: +.ret_copysign: + pop de + ld e, a + pop bc + pop af + ret z + set 7, e + ret + +.ret_zero: + ; carry is cleared + pop hl + xor a, a + sbc hl, hl + jr .ret_copysign + +.maybe_subnormal: + ld hl, -$3690 + add hl, bc + jr nc, .ret_zero + ; calculate shift + ; A = (uint8_t)((BC - $3690) >> 4) + ; A = (uint8_t)((HL << 4) >> 8) + add hl, hl + add hl, hl + add hl, hl + add hl, hl + ld a, h + ; Shift = -A + 4 + 24 + cpl + add a, (4 + 24) + 1 ; (4 + 24) + CPL trick + ; maximum shift = 24 + 4 + 25 = 24 + 29 = 53 + ; minimum shift = 24 + 4 + 1 = 24 + 5 = 29 + ld b, a + ld e, a ; store shift amount + xor a, a + ; calculate sticky bits + ld hl, 1 +.shift_loop: + add hl, hl + rla + djnz .shift_loop + ; carry won't be set + ; set C:UDE to A:UHL + ; shift by an additional 24 bits + dec hl + or a, a + jr z, .the_set_bit_is_in_hl + dec a +.the_set_bit_is_in_hl: + ld c, a + ld a, e ; restore shift amount + ex de, hl + scf + sbc hl, hl + ; BC:UDE:UHL = 1 << shift + ; (SP) = X + call __lland + ; test if BC:UDE:UHL is zero + ; UBC must be zero for this to work + add hl, de ; carry may be set + adc hl, bc ; wont overflow + pop hl + ; DE and BC are swapped here + pop bc + pop de + push de + push bc + + ; clear exponent and include the implicit mantissa bit + ld d, 0 + jr z, .no_sticky_bits + inc d +.no_sticky_bits: + + ld l, a ; L = shift + ld a, e + and a, $0F + or a, $10 + + call __lshru + xor a, a ; subnormal exponent + ; HL = BC >> 1 + scf + sbc hl, hl ; ld hl, -1 + add hl, sp + push bc + srl (hl) + pop hl + rr h + rr l ; round bit shifted out + + jr nc, .subnormal_no_round + dec d + jr z, .subnormal_round_up + bit 0, l + jr z, .subnormal_no_round +.subnormal_round_up: + inc hl ; wont overflow, but may become FLT_MIN +; .subnormal_no_round: + jr .ret_copysign + + extern __lland + extern __llshru + extern __lshru diff --git a/test/floating_point/float64_to_float32/src/f64_to_f32_LUT.h b/test/floating_point/float64_to_float32/src/f64_to_f32_LUT.h index 904062339..5935844cd 100644 --- a/test/floating_point/float64_to_float32/src/f64_to_f32_LUT.h +++ b/test/floating_point/float64_to_float32/src/f64_to_f32_LUT.h @@ -9,7 +9,7 @@ typedef uint64_t input_type; typedef uint32_t output_type; -static const input_type f64_to_f32_LUT_input[256] = { +static const input_type f64_to_f32_LUT_input[260] = { /* 0 */ UINT64_C(0x0000000000000000), /* 1 */ UINT64_C(0x0000000000000001), /* 2 */ UINT64_C(0x0010000000000000), @@ -266,9 +266,14 @@ static const input_type f64_to_f32_LUT_input[256] = { /* 253 */ UINT64_C(0xD22D38D57ABF3991), /* 254 */ UINT64_C(0xA86498F2933913FB), /* 255 */ UINT64_C(0x4841C1F00831E908), +/* bonus edge cases */ +/* 256 */ UINT64_C(0x369F82B925D1BFBA), +/* 257 */ UINT64_C(0xB76634D97D4F585C), +/* 258 */ UINT64_C(0x36DD000000000000), +/* 259 */ UINT64_C(0xB80E0000A0000000), }; -const output_type f64_to_f32_LUT_output[256] = { +const output_type f64_to_f32_LUT_output[260] = { /* 0 */ UINT32_C(0x00000000), /* 1 */ UINT32_C(0x00000000), /* 2 */ UINT32_C(0x00000000), @@ -525,6 +530,11 @@ const output_type f64_to_f32_LUT_output[256] = { /* 253 */ UINT32_C(0xFF800000), /* 254 */ UINT32_C(0x80000000), /* 255 */ UINT32_C(0x7F800000), +/* bonus edge cases */ +/* 256 */ UINT32_C(0x00000001), +/* 257 */ UINT32_C(0x80001635), +/* 258 */ UINT32_C(0x0000000E), +/* 259 */ UINT32_C(0x80780002), }; #endif /* F64_TO_F32_LUT_H */ diff --git a/test/floating_point/float64_to_float32/src/main.c b/test/floating_point/float64_to_float32/src/main.c index 4cf893fd2..085c7a609 100644 --- a/test/floating_point/float64_to_float32/src/main.c +++ b/test/floating_point/float64_to_float32/src/main.c @@ -18,6 +18,165 @@ typedef union F32_pun { uint32_t bin; } F32_pun; +typedef union F64_pun { + long double flt; + uint64_t bin; +} F64_pun; + +/* debugging code */ +#if 0 + +void print_failed(long double input, uint32_t guess, uint32_t truth) { + F64_pun value; + value.flt = input; + printf( + "I: %016llX -->\nG: %08lX != %08lX\n", + value.bin, guess, truth + ); +} + +typedef union Float32_Bitwise { + float flt_part; + uint32_t u32_part; +} Float32_Bitwise; + +typedef union Float64_Bitwise { + long double flt_part; + uint64_t u64_part; +} Float64_Bitwise; + +uint32_t _dtof_c(uint64_t x) { + uint64_t mant = x & ((UINT64_C(1) << 52) - 1); + const uint16_t biased = (x >> 52) & 0x7FF; + const uint16_t BC = (uint16_t)((x >> 48) & 0x7FFF); + // 879 == 0x369 + if (BC < 16 * (1023 - (126 + 24))) { + return 0; + } + // 897 == 0x381 + if (BC < 16 * (1023 - 126)) { + uint64_t norm = mant | (UINT64_C(1) << 52); + uint8_t sub_expon = -(uint8_t)((BC - 16 * (1023 - 126)) >> 4); + const bool sticky = norm & ((UINT64_C(1) << (sub_expon + (4 + 24))) - 1); + norm >>= sub_expon + (4 + 24); + const bool round = norm & 1; + const bool guard = norm & 2; + norm >>= 1; + + uint32_t ret = (uint32_t)norm; + if (round && (guard || sticky)) { + // round up to even + ret++; + } + return ret; + } + // 1151 == 0x47F + if (biased >= 128 + 1023) { + #if 1 + if (biased == 0x7FF) { + uint32_t ret = (uint32_t)(mant >> (5 + 24)) & UINT32_C(0x007FFFFF); + #if 1 + /* Preserve NaN */ + bool low_nan = mant & ((UINT32_C(1) << (5 + 24)) - 1); + if (low_nan) { + ret |= 1; + } + #else + /* Signal NaN */ + ret |= (UINT32_C(1) << 22); + #endif + ret |= UINT32_C(0x7F800000); + return ret; + } + #endif + return UINT32_C(0x7F800000); + } + // result is normal or rounds to infinity + const bool sticky = mant & ((UINT64_C(1) << (4 + 24)) - 1); + const bool round = mant & (UINT64_C(1) << (4 + 24)); + const bool guard = mant & (UINT64_C(1) << (5 + 24)); + mant >>= (5 + 24); + uint32_t ret = (uint32_t)mant; + // -896 == -0x380 + uint16_t new_expon = biased + (uint16_t)(127 - 1023); + ret |= (uint32_t)new_expon << 23; + // we round afterwards incase we round up to the next exponent + if (round && (guard || sticky)) { + // round up to even + ret++; + } + return ret; +} + +float dtof(long double x_flt) { + Float64_Bitwise x; + Float32_Bitwise y; + #if 1 + x.flt_part = fabsl(x_flt); + y.u32_part = _dtof_c(x.u64_part); + if (signbit(x_flt)) { + y.u32_part ^= (UINT32_C(1) << 31); + } + #else + x.flt_part = x_flt; + y.u32_part = _dtof_c(x.u64_part); + #endif + return y.flt_part; +} + +static uint64_t rand_u64(void) { + union { + uint64_t u64; + uint16_t u16[4]; + } split; + #if 1 + split.u16[0] = (uint16_t)rand(); + split.u16[1] = (uint16_t)rand(); + split.u16[2] = (uint16_t)rand(); + #else + split.u16[0] = (uint16_t)rand() & 0x0001; + split.u16[1] = (uint16_t)rand() & 0xFF00; + split.u16[2] = (uint16_t)rand() & 0x8000; + #endif + #if 0 + split.u16[3] = (uint16_t)rand() | 0x3E60; + #elif 0 + split.u16[3] = ((unsigned)rand() % (0x4820 - 0x3660)) + 0x3660; + #else + split.u16[3] = ((unsigned)rand() % (0x3840 - 0x3660)) + 0x3660; + #endif + + return split.u64; +} + +void brute_force_test(void) { + fputs("\n", stdout); + srand(0x7184CE); + for (size_t i = 0; i < 65536; i++) { + F64_pun x; + F32_pun t; + F32_pun g; + x.bin = rand_u64(); + g.flt = (float)x.flt; + t.flt = dtof(x.flt); + if (g.bin != t.bin) { + print_failed(x.flt, g.bin, t.bin); + if (!(isnan(g.flt) && isnan(t.flt))) { + return; + } + } + } + fputs("All Good\n", stdout); +} + +#else + +#define print_failed(...) + +#define brute_force_test() + +#endif + size_t run_test(void) { typedef long double input_t; typedef F32_pun output_t; @@ -31,6 +190,7 @@ size_t run_test(void) { result.flt = (float)input[i]; if (result.bin != output[i].bin) { if (!(isnan(result.flt) && isnan(output[i].flt))) { + print_failed(input[i], result.bin, output[i].bin); return i; } } @@ -50,6 +210,7 @@ int main(void) { boot_sprintf(buf, "Failed test: %u\n", fail_index); fputs(buf, stdout); } + brute_force_test(); while (!os_GetCSC());