@@ -16,202 +16,187 @@ _scalbn := _ldexpf
16
16
17
17
else
18
18
19
- if 1
19
+ ; (set to 0 or 1) avoid returning negative zero on underflow with Ti's floats
20
+ __ldexpf_avoid_negative_zero := 1
20
21
21
- ; NOTE: since Ti floats are used, negative zero will not be returned unless the
22
- ; input was negative zero.
23
- ;
24
- ; normal inputs are handled correctly, unless the output is subnormal
25
- ; subnormal inputs/outputs return zero and set ERANGE and FE_INEXACT
26
- ; zero/infinite/NaN inputs are handled correctly
27
- _ldexpf:
28
- _ldexp:
29
- _scalbnf:
22
+ ; ldexpf behaviour:
23
+ ; - signed zero, infinity, and NaN inputs are returned unmodified
24
+ ; - ERRNO and FE_INEXACT are set if a finite value becomes zero or infinite
25
+ ; - FE_INEXACT is set if rounding occured
26
+ ;-------------------------------------------------------------------------------
27
+
28
+ private __ldexpf_helper
29
+ __ldexpf_helper:
30
+ .maybe_subnormal:
31
+ or a, a
32
+ adc hl, bc ; BC is zero
33
+ .ret_self:
34
+ ld hl, (iy + 3) ; mant
35
+ ret z ; return zero/inf/NaN
36
+ dec bc ; BC is now -1
37
+ ; .subnormal_input:
38
+ ; BC is -1 here
39
+ bit 7, (iy + 11) ; scale sign
40
+ jr nz, .move_subnormal_down
41
+ ; .move_subnormal_up:
42
+ ld a, e ; signbit
43
+ ld de, (iy + 9) ; scale
44
+ .norm_loop:
45
+ add hl, hl
46
+ jr c, .normalized
47
+ ex de, hl
48
+ add hl, bc ; --scale
49
+ ex de, hl
50
+ jr c, .norm_loop
51
+ ; .still_subnormal:
52
+ ; DE is -1 here
53
+ inc e ; ld e, 0
54
+ jr _ldexpf.finish_subnormal
55
+ .normalized:
56
+ inc de
57
+ ex de, hl
58
+ jr _ldexpf.scale_up
59
+
60
+ .move_subnormal_down:
61
+ ; BC is -1 here
62
+ ; first we need to test that the result won't be zero
63
+ call __ictlz
64
+ ; A is [1, 23]
65
+ ; return zero if (scale < clz_result - 24) or (clz_result - 25 >= scale)
66
+ sub a, 24 ; A is [-23, -1]
67
+ ld c, a ; sign extend A
68
+ ld hl, (iy + 9) ; scale
69
+ ld a, l
70
+ or a, a
71
+ sbc hl, bc
72
+ cpl
73
+ jr nc, _ldexpf.shru_common
74
+ .underflow_to_zero:
75
+ xor a, a
76
+ ld b, a ; ld b, 0
77
+ if __ldexpf_avoid_negative_zero
78
+ res 7, (iy + 6)
79
+ end if
80
+ .overflow_to_inf: ; <-- Carry is set when inf/NaN
81
+ ld hl, 5 ; ERANGE
82
+ ld (_errno), hl
83
+ ld l, h ; ld l, 0
84
+ ex de, hl
85
+ jr nc, _ldexpf.underflow_hijack
86
+ ld de, $800000
87
+ ld b, $7F
88
+ jr _ldexpf.overflow_hijack
89
+
90
+ ;-------------------------------------------------------------------------------
91
+ ; When the input and output are normal:
92
+ ; scaling up : 60F + 12R + 4W + 2
93
+ ; scaling down: 60F + 12R + 4W + 4
30
94
_scalbn:
95
+ _scalbnf:
96
+ _ldexp:
97
+ _ldexpf:
31
98
ld iy, 0
32
99
lea bc, iy + 0
33
100
add iy, sp
34
101
ld hl, (iy + 3) ; mant
35
102
add hl, hl
36
103
ld a, (iy + 6) ; expon
104
+ ld e, a ; signbit
37
105
adc a, a
38
- jr z, .maybe_subnormal
106
+ jr z, __ldexpf_helper.maybe_subnormal
107
+ ld c, a
39
108
inc a
40
- jr z, .ret_self ; inf NaN
41
- dec a
109
+ jr z, __ldexpf_helper .ret_self ; inf NaN
110
+ ld a, e ; signbit
42
111
ex de, hl
43
112
ld hl, (iy + 9) ; scale
44
- ld c, a
45
113
add hl, bc ; add expon
46
- ld a, l
47
114
bit 7, (iy + 11) ; scale sign
48
- jr z, .scale_up
49
- .scale_down:
50
- ; HL is not INT_MIN here
51
- dec hl
52
- add hl, hl
53
- jr nc, .finish ; expon > 0
54
- ; expon <= 0 or subnormal
55
- .underflow_to_zero:
56
- ld hl, ___fe_cur_env
57
- set 5, (hl) ; FE_INEXACT
58
- if 0
59
- ld a, (iy + 6)
60
- and a, $80 ; copysign
61
- else
62
- xor a, a ; avoid returning negative zero with Ti's floats
63
- end if
64
- sbc hl, hl
65
- .common_erange:
66
- ld bc, 5 ; ERANGE
67
- ld (_errno), bc
68
- ld e, a
69
- ret
70
- .overflow_to_inf:
71
- ld hl, $800000
72
- ld a, (iy + 6)
73
- or a, $7F ; copysign
74
- jr .common_erange
75
-
115
+ jr nz, .scale_down
76
116
.scale_up:
77
- ld bc, -255
117
+ ld bc, -255 ; $FFFF01
78
118
add hl, bc
79
- jr c, .overflow_to_inf
80
- .finish:
81
- ld l, a
119
+ jr c, __ldexpf_helper .overflow_to_inf
120
+ ; sbc hl, bc ; restore hl
121
+ dec l ; we only care about the low 8 bits
82
122
ex de, hl
83
- ; signbit(A) E:UHL >>= 1
84
- ld a, (iy + 6) ; expon
123
+ .finish_subnormal:
85
124
push hl
86
- rla
125
+ .finish:
126
+ rla ; extract signbit
87
127
rr e
88
128
rr (iy - 1)
89
129
pop hl
90
130
rr h
91
131
rr l
92
132
ret
93
133
94
- .maybe_subnormal:
95
- dec bc ; BC is now -1
96
- add hl, bc
97
- jr c, .underflow_to_zero
98
- ; return zero
99
- .ret_self:
100
- ld hl, (iy + 3)
101
- ld e, (iy + 6)
102
- ret
103
-
104
- else
105
-
106
- ; normal inputs are handled correctly, unless the output is subnormal
107
- ; subnormal inputs are handled correctly for positive scaling values
108
- ; subnormal outputs return zero and set ERANGE and FE_INEXACT for negative scaling values
109
- ; zero/infinite/NaN inputs are handled correctly
110
- _ldexpf:
111
- _ldexp:
112
- _scalbnf:
113
- _scalbn:
114
- ld iy, 0
115
- lea bc, iy + 0
116
- add iy, sp
117
- ld hl, (iy + 3) ; mant
118
- add hl, hl
119
- ld e, (iy + 6) ; expon
120
- ld a, e
121
- rl e
122
- jr z, .maybe_subnormal
123
- inc e
124
- jr z, .ret_self ; inf NaN
125
- dec e
126
- ld c, e
127
- ex de, hl
128
- ld hl, (iy + 9) ; scale
129
- add hl, bc ; add expon
130
- ld a, l
131
- bit 7, (iy + 11) ; scale sign
132
- jr z, .scale_up
134
+ ;-------------------------------------------------------------------------------
133
135
.scale_down:
134
- ; test signbit
135
- push hl
136
+ push de ; mant <<= 1
137
+ ld e, l ; shift amount
136
138
; HL is not INT_MIN here
137
139
dec hl
138
140
add hl, hl
139
- pop hl
140
141
jr nc, .finish ; expon > 0
141
- ; expon <= 0 or subnormal
142
- ; jr .underflow_to_zero
143
- .underflow_to_zero:
142
+ ;-------------------------------------------------------------------------------
143
+ .shru_to_subnormal:
144
+ xor a, a
145
+ sub a, e
146
+ pop de
147
+ ld c, 48 ; ld bc, 24 << 1
148
+ add hl, bc
149
+ jr nc, __ldexpf_helper.underflow_to_zero
150
+
151
+ set 7, (iy + 5) ; set implicit mantissa bit
152
+ .shru_common:
153
+ ; A should be [0, 23]
154
+ ld b, a
155
+ ld hl, (iy + 3) ; mantissa
156
+ push hl ; ld (iy - 3), hl
157
+ xor a, a
158
+ inc b
159
+ ; shift amount will be [1, 24]
160
+ ld c, a ; ld c, 0
161
+ ld d, (iy - 1)
162
+ .shru_loop:
163
+ adc a, c ; collect sticky bits
164
+ srl d
165
+ rr h
166
+ rr l
167
+ djnz .shru_loop
168
+ ld (iy - 1), d
169
+ pop de
170
+ ld d, h
171
+ ld e, l
172
+
173
+ ; round upwards to even if (round && (guard || sticky))
174
+ jr nc, .no_round
175
+ ; be careful not to touch the carry flag
176
+ inc a
177
+ dec a
178
+ jr nz, .round_up
179
+ bit 0, e ; test guard bit
180
+ jr z, .no_round
181
+ .round_up:
182
+ inc de ; round upwards to even (wont overflow)
183
+ .no_round:
184
+ adc a, a
185
+ jr z, .result_is_exact
186
+ .underflow_hijack:
187
+ .overflow_hijack:
144
188
ld hl, ___fe_cur_env
145
189
set 5, (hl) ; FE_INEXACT
146
- ld a, (iy + 6)
190
+ .result_is_exact:
191
+ ld a, (iy + 6) ; get signbit
192
+ ex de, hl
147
193
and a, $80 ; copysign
148
- sbc hl, hl
149
- .common_erange:
150
- ld bc, 5 ; ERANGE
151
- ld (_errno), bc
194
+ or a, b ; used for the overflow to infinite path
152
195
ld e, a
153
196
ret
154
- .overflow_to_inf:
155
- ld hl, $800000
156
- ld a, (iy + 6)
157
- or a, $7F ; copysign
158
- jr .common_erange
159
197
160
- .scale_up:
161
- ld bc, -255
162
- add hl, bc
163
- jr c, .overflow_to_inf
164
- .finish:
165
- ld l, a
166
- ex de, hl
167
- .finish_subnormal:
168
- ; signbit(A) E:UHL >>= 1
169
- ld a, (iy + 6) ; expon
170
- push hl
171
- rla
172
- rr e
173
- rr (iy - 1)
174
- pop hl
175
- rr h
176
- rr l
177
- ret
178
-
179
- .maybe_subnormal:
180
- dec bc ; BC is now -1
181
- add hl, bc
182
- ; jr c, .underflow_to_zero
183
- jr c, .subnormal
184
- ; return zero
185
- .ret_self:
186
- ld hl, (iy + 3)
187
- ld e, (iy + 6)
188
- ret
189
-
190
- .subnormal:
191
- ; BC is -1 here
192
- bit 7, (iy + 11) ; scale sign
193
- jr nz, .underflow_to_zero
194
- ld de, (iy + 9) ; scale
195
- ld hl, (iy + 3) ; mant
196
- .norm_loop:
197
- add hl, hl
198
- jr c, .normalized
199
- ex de, hl
200
- add hl, bc ; --scale
201
- ex de, hl
202
- jr c, .norm_loop
203
- ; .still_subnormal:
204
- ld e, 0
205
- jr .finish_subnormal
206
- .normalized:
207
- inc de
208
- ex de, hl
209
- ld a, l
210
- jr .scale_up
211
-
212
- end if
213
-
214
- extern ___fe_cur_env
215
198
extern _errno
199
+ extern ___fe_cur_env
200
+ extern __ictlz
216
201
217
202
end if
0 commit comments