Skip to content

Commit ca86e17

Browse files
committed
ldexpf now correctly handles/rounds subnormal inputs
1 parent a9687a1 commit ca86e17

File tree

3 files changed

+206
-179
lines changed

3 files changed

+206
-179
lines changed

src/libc/ldexpf.src

Lines changed: 145 additions & 160 deletions
Original file line numberDiff line numberDiff line change
@@ -16,202 +16,187 @@ _scalbn := _ldexpf
1616

1717
else
1818

19-
if 1
19+
; (set to 0 or 1) avoid returning negative zero on underflow with Ti's floats
20+
__ldexpf_avoid_negative_zero := 1
2021

21-
; NOTE: since Ti floats are used, negative zero will not be returned unless the
22-
; input was negative zero.
23-
;
24-
; normal inputs are handled correctly, unless the output is subnormal
25-
; subnormal inputs/outputs return zero and set ERANGE and FE_INEXACT
26-
; zero/infinite/NaN inputs are handled correctly
27-
_ldexpf:
28-
_ldexp:
29-
_scalbnf:
22+
; ldexpf behaviour:
23+
; - signed zero, infinity, and NaN inputs are returned unmodified
24+
; - ERRNO and FE_INEXACT are set if a finite value becomes zero or infinite
25+
; - FE_INEXACT is set if rounding occured
26+
;-------------------------------------------------------------------------------
27+
28+
private __ldexpf_helper
29+
__ldexpf_helper:
30+
.maybe_subnormal:
31+
or a, a
32+
adc hl, bc ; BC is zero
33+
.ret_self:
34+
ld hl, (iy + 3) ; mant
35+
ret z ; return zero/inf/NaN
36+
dec bc ; BC is now -1
37+
; .subnormal_input:
38+
; BC is -1 here
39+
bit 7, (iy + 11) ; scale sign
40+
jr nz, .move_subnormal_down
41+
; .move_subnormal_up:
42+
ld a, e ; signbit
43+
ld de, (iy + 9) ; scale
44+
.norm_loop:
45+
add hl, hl
46+
jr c, .normalized
47+
ex de, hl
48+
add hl, bc ; --scale
49+
ex de, hl
50+
jr c, .norm_loop
51+
; .still_subnormal:
52+
; DE is -1 here
53+
inc e ; ld e, 0
54+
jr _ldexpf.finish_subnormal
55+
.normalized:
56+
inc de
57+
ex de, hl
58+
jr _ldexpf.scale_up
59+
60+
.move_subnormal_down:
61+
; BC is -1 here
62+
; first we need to test that the result won't be zero
63+
call __ictlz
64+
; A is [1, 23]
65+
; return zero if (scale < clz_result - 24) or (clz_result - 25 >= scale)
66+
sub a, 24 ; A is [-23, -1]
67+
ld c, a ; sign extend A
68+
ld hl, (iy + 9) ; scale
69+
ld a, l
70+
or a, a
71+
sbc hl, bc
72+
cpl
73+
jr nc, _ldexpf.shru_common
74+
.underflow_to_zero:
75+
xor a, a
76+
ld b, a ; ld b, 0
77+
if __ldexpf_avoid_negative_zero
78+
res 7, (iy + 6)
79+
end if
80+
.overflow_to_inf: ; <-- Carry is set when inf/NaN
81+
ld hl, 5 ; ERANGE
82+
ld (_errno), hl
83+
ld l, h ; ld l, 0
84+
ex de, hl
85+
jr nc, _ldexpf.underflow_hijack
86+
ld de, $800000
87+
ld b, $7F
88+
jr _ldexpf.overflow_hijack
89+
90+
;-------------------------------------------------------------------------------
91+
; When the input and output are normal:
92+
; scaling up : 60F + 12R + 4W + 2
93+
; scaling down: 60F + 12R + 4W + 4
3094
_scalbn:
95+
_scalbnf:
96+
_ldexp:
97+
_ldexpf:
3198
ld iy, 0
3299
lea bc, iy + 0
33100
add iy, sp
34101
ld hl, (iy + 3) ; mant
35102
add hl, hl
36103
ld a, (iy + 6) ; expon
104+
ld e, a ; signbit
37105
adc a, a
38-
jr z, .maybe_subnormal
106+
jr z, __ldexpf_helper.maybe_subnormal
107+
ld c, a
39108
inc a
40-
jr z, .ret_self ; inf NaN
41-
dec a
109+
jr z, __ldexpf_helper.ret_self ; inf NaN
110+
ld a, e ; signbit
42111
ex de, hl
43112
ld hl, (iy + 9) ; scale
44-
ld c, a
45113
add hl, bc ; add expon
46-
ld a, l
47114
bit 7, (iy + 11) ; scale sign
48-
jr z, .scale_up
49-
.scale_down:
50-
; HL is not INT_MIN here
51-
dec hl
52-
add hl, hl
53-
jr nc, .finish ; expon > 0
54-
; expon <= 0 or subnormal
55-
.underflow_to_zero:
56-
ld hl, ___fe_cur_env
57-
set 5, (hl) ; FE_INEXACT
58-
if 0
59-
ld a, (iy + 6)
60-
and a, $80 ; copysign
61-
else
62-
xor a, a ; avoid returning negative zero with Ti's floats
63-
end if
64-
sbc hl, hl
65-
.common_erange:
66-
ld bc, 5 ; ERANGE
67-
ld (_errno), bc
68-
ld e, a
69-
ret
70-
.overflow_to_inf:
71-
ld hl, $800000
72-
ld a, (iy + 6)
73-
or a, $7F ; copysign
74-
jr .common_erange
75-
115+
jr nz, .scale_down
76116
.scale_up:
77-
ld bc, -255
117+
ld bc, -255 ; $FFFF01
78118
add hl, bc
79-
jr c, .overflow_to_inf
80-
.finish:
81-
ld l, a
119+
jr c, __ldexpf_helper.overflow_to_inf
120+
; sbc hl, bc ; restore hl
121+
dec l ; we only care about the low 8 bits
82122
ex de, hl
83-
; signbit(A) E:UHL >>= 1
84-
ld a, (iy + 6) ; expon
123+
.finish_subnormal:
85124
push hl
86-
rla
125+
.finish:
126+
rla ; extract signbit
87127
rr e
88128
rr (iy - 1)
89129
pop hl
90130
rr h
91131
rr l
92132
ret
93133

94-
.maybe_subnormal:
95-
dec bc ; BC is now -1
96-
add hl, bc
97-
jr c, .underflow_to_zero
98-
; return zero
99-
.ret_self:
100-
ld hl, (iy + 3)
101-
ld e, (iy + 6)
102-
ret
103-
104-
else
105-
106-
; normal inputs are handled correctly, unless the output is subnormal
107-
; subnormal inputs are handled correctly for positive scaling values
108-
; subnormal outputs return zero and set ERANGE and FE_INEXACT for negative scaling values
109-
; zero/infinite/NaN inputs are handled correctly
110-
_ldexpf:
111-
_ldexp:
112-
_scalbnf:
113-
_scalbn:
114-
ld iy, 0
115-
lea bc, iy + 0
116-
add iy, sp
117-
ld hl, (iy + 3) ; mant
118-
add hl, hl
119-
ld e, (iy + 6) ; expon
120-
ld a, e
121-
rl e
122-
jr z, .maybe_subnormal
123-
inc e
124-
jr z, .ret_self ; inf NaN
125-
dec e
126-
ld c, e
127-
ex de, hl
128-
ld hl, (iy + 9) ; scale
129-
add hl, bc ; add expon
130-
ld a, l
131-
bit 7, (iy + 11) ; scale sign
132-
jr z, .scale_up
134+
;-------------------------------------------------------------------------------
133135
.scale_down:
134-
; test signbit
135-
push hl
136+
push de ; mant <<= 1
137+
ld e, l ; shift amount
136138
; HL is not INT_MIN here
137139
dec hl
138140
add hl, hl
139-
pop hl
140141
jr nc, .finish ; expon > 0
141-
; expon <= 0 or subnormal
142-
; jr .underflow_to_zero
143-
.underflow_to_zero:
142+
;-------------------------------------------------------------------------------
143+
.shru_to_subnormal:
144+
xor a, a
145+
sub a, e
146+
pop de
147+
ld c, 48 ; ld bc, 24 << 1
148+
add hl, bc
149+
jr nc, __ldexpf_helper.underflow_to_zero
150+
151+
set 7, (iy + 5) ; set implicit mantissa bit
152+
.shru_common:
153+
; A should be [0, 23]
154+
ld b, a
155+
ld hl, (iy + 3) ; mantissa
156+
push hl ; ld (iy - 3), hl
157+
xor a, a
158+
inc b
159+
; shift amount will be [1, 24]
160+
ld c, a ; ld c, 0
161+
ld d, (iy - 1)
162+
.shru_loop:
163+
adc a, c ; collect sticky bits
164+
srl d
165+
rr h
166+
rr l
167+
djnz .shru_loop
168+
ld (iy - 1), d
169+
pop de
170+
ld d, h
171+
ld e, l
172+
173+
; round upwards to even if (round && (guard || sticky))
174+
jr nc, .no_round
175+
; be careful not to touch the carry flag
176+
inc a
177+
dec a
178+
jr nz, .round_up
179+
bit 0, e ; test guard bit
180+
jr z, .no_round
181+
.round_up:
182+
inc de ; round upwards to even (wont overflow)
183+
.no_round:
184+
adc a, a
185+
jr z, .result_is_exact
186+
.underflow_hijack:
187+
.overflow_hijack:
144188
ld hl, ___fe_cur_env
145189
set 5, (hl) ; FE_INEXACT
146-
ld a, (iy + 6)
190+
.result_is_exact:
191+
ld a, (iy + 6) ; get signbit
192+
ex de, hl
147193
and a, $80 ; copysign
148-
sbc hl, hl
149-
.common_erange:
150-
ld bc, 5 ; ERANGE
151-
ld (_errno), bc
194+
or a, b ; used for the overflow to infinite path
152195
ld e, a
153196
ret
154-
.overflow_to_inf:
155-
ld hl, $800000
156-
ld a, (iy + 6)
157-
or a, $7F ; copysign
158-
jr .common_erange
159197

160-
.scale_up:
161-
ld bc, -255
162-
add hl, bc
163-
jr c, .overflow_to_inf
164-
.finish:
165-
ld l, a
166-
ex de, hl
167-
.finish_subnormal:
168-
; signbit(A) E:UHL >>= 1
169-
ld a, (iy + 6) ; expon
170-
push hl
171-
rla
172-
rr e
173-
rr (iy - 1)
174-
pop hl
175-
rr h
176-
rr l
177-
ret
178-
179-
.maybe_subnormal:
180-
dec bc ; BC is now -1
181-
add hl, bc
182-
; jr c, .underflow_to_zero
183-
jr c, .subnormal
184-
; return zero
185-
.ret_self:
186-
ld hl, (iy + 3)
187-
ld e, (iy + 6)
188-
ret
189-
190-
.subnormal:
191-
; BC is -1 here
192-
bit 7, (iy + 11) ; scale sign
193-
jr nz, .underflow_to_zero
194-
ld de, (iy + 9) ; scale
195-
ld hl, (iy + 3) ; mant
196-
.norm_loop:
197-
add hl, hl
198-
jr c, .normalized
199-
ex de, hl
200-
add hl, bc ; --scale
201-
ex de, hl
202-
jr c, .norm_loop
203-
; .still_subnormal:
204-
ld e, 0
205-
jr .finish_subnormal
206-
.normalized:
207-
inc de
208-
ex de, hl
209-
ld a, l
210-
jr .scale_up
211-
212-
end if
213-
214-
extern ___fe_cur_env
215198
extern _errno
199+
extern ___fe_cur_env
200+
extern __ictlz
216201

217202
end if

0 commit comments

Comments
 (0)