1
1
assume adl=1
2
2
3
+ ;-------------------------------------------------------------------------------
4
+
5
+ section .text
6
+
7
+ public __ulltod
8
+ ; (long double)unsigned long long
9
+ __ulltod:
10
+ cp a, a ; set Z flag
11
+ push af
12
+ jq __lltod_common
13
+
14
+ ;-------------------------------------------------------------------------------
15
+
16
+ section .text
17
+
18
+ public __lltod
19
+ ; (long double)long long
20
+ __lltod:
21
+ bit 7, b
22
+ push af
23
+ call nz, __llneg ; abs(BC:UDE:UHL)
24
+
25
+ require __lltod_common
26
+
27
+ ;-------------------------------------------------------------------------------
28
+
29
+ section .text
30
+
31
+ private __lltod_common
32
+ __lltod_common:
33
+ call __llctlz
34
+ sub a, 63 ; normalize clz_result
35
+ ; filter out exponent of $000 (zero) and $3FF (one)
36
+ jr nc, __int_to_f64_zero_or_one
37
+ ; A is [-63, -1]
38
+ add a, 52
39
+ ; A is [-11, 51]
40
+ jr c, __int_to_f64_shl
41
+ ; __int_to_f64_shr:
42
+ ; exponent = (1023 or $3FF or f64_bias) + base2_logarithm
43
+ ; Minimum exponent: $434 (2^53)
44
+ ; Maximum exponent: $43E (2^63)
45
+ ; It is assumed that A is [-11, -1] here, or [-63, -53] before adding 52
46
+ cpl
47
+ inc a
48
+ ; A is [1, 11]
49
+ push hl
50
+ push bc
51
+ ld b, a
52
+ ld c, 1
53
+ .shift_loop:
54
+ jr nc, .no_carry
55
+ inc c
56
+ .no_carry:
57
+ srl h
58
+ rr l
59
+ djnz .shift_loop
60
+ ; test round bit
61
+ jr nc, .no_round
62
+ ; test sticky bits
63
+ dec c
64
+ jr nz, .round_up
65
+ ; test guard bit
66
+ bit 0, l
67
+ jr nc, .no_round
68
+ .round_up:
69
+ inc b ; round up after shifting
70
+ .no_round:
71
+ ld h, b
72
+ pop bc
73
+
74
+ ld l, a
75
+ ex (sp), hl ; (SP) = shift
76
+ call __llshru
77
+ ex (sp), hl ; (SP) = shifted HL, H = rounding, L = shift
78
+ add a, 51
79
+
80
+ dec h
81
+ push af
82
+ ; exponent = ($400 + (base2_logarithm - 1)) << 4
83
+ ; BC = $4EEM
84
+ ld l, a
85
+ ld h, $04
86
+ ; clear the implicit mantissa bit
87
+ res 4, c ; 52 % 8 == 4
88
+ add hl, hl
89
+ add hl, hl
90
+ add hl, hl
91
+ add hl, hl
92
+ ld a, l
93
+ or a, c
94
+ ld c, a
95
+ ld b, h
96
+ pop af
97
+ pop hl ; restore shifted HL
98
+ call z, __lladd_1 ; round up to even
99
+ jr __int_to_f64_shl.finish
100
+
101
+ ;-------------------------------------------------------------------------------
102
+
103
+ section .text
104
+
105
+ private __int_to_f64_zero_or_one
106
+ __int_to_f64_zero_or_one:
107
+ ; carry is cleared here
108
+ ; UHL is either one or zero
109
+ ld b, h
110
+ ld c, h
111
+ jr nz, .ret_zero
112
+ ld bc, $3FF0
113
+ dec hl ; ld hl, 0
114
+ .ret_zero:
115
+ ex de, hl
116
+ sbc hl, hl
117
+ jr __int_to_f64_shl.finish
118
+
119
+ ;-------------------------------------------------------------------------------
120
+
3
121
section .text
4
122
5
123
public __itod
6
124
; (long double)int
7
125
__itod:
8
126
push hl
9
127
add hl, hl ; extract signbit
128
+ sbc hl, hl ; set Z flag
129
+ ld e, l ; sign extend UHL to E:UHL
10
130
pop hl
11
- push af
12
- ld e, 0
13
- call c, __ineg ; abs(UHL)
14
- jr __ltod.hijack
131
+ jq __ltod
132
+
133
+ ;-------------------------------------------------------------------------------
15
134
16
135
section .text
17
136
@@ -22,102 +141,104 @@ __utod:
22
141
23
142
require __ultod
24
143
144
+ ;-------------------------------------------------------------------------------
145
+
25
146
section .text
26
147
27
148
public __ultod
28
149
; (long double)unsigned long
29
150
__ultod:
30
- or a, a
31
- push af
32
- jr __ltod.hijack
151
+ cp a, a ; set Z flag
152
+ push af
153
+ jq __ltod_common
154
+
155
+ ;-------------------------------------------------------------------------------
33
156
34
157
section .text
35
158
36
159
public __ltod
37
160
; (long double)long
38
161
__ltod:
39
- rlc e
162
+ bit 7, e
163
+
164
+ require __ltod.hijack_itod
165
+
166
+ ;-------------------------------------------------------------------------------
167
+
168
+ section .text
169
+
170
+ private __ltod.hijack_itod
171
+ __ltod.hijack_itod:
172
+
40
173
push af
41
- rrc e
42
- call c, __lneg ; abs(E:UHL)
174
+ call nz, __lneg ; abs(E:UHL)
43
175
44
- require __ltod.hijack
176
+ require __ltod_common
177
+
178
+ ;-------------------------------------------------------------------------------
45
179
46
180
section .text
47
181
48
- private __ltod.hijack
49
- __ltod.hijack :
182
+ private __ltod_common
183
+ __ltod_common :
50
184
call __lctlz
51
- inc.s bc ; clear UBC
52
- ld b, a ; <<= 8
53
- xor a, $20 ; turns 32 into zero and clears carry flag
54
- jr z, .zero
55
- ; clears the MSB since the float will be normalized
56
- ; x <<= clz_result + 1; /* shift by 32 is UB */
57
- if 0
58
- ; calculate the exponent
59
- push hl
60
- ; 1023 + 31 = 1054 = 0x41E
61
- ld hl, $041E00
62
- ld c, l ; ld c, 0
63
- sbc hl, bc
64
- ld l, e ; (expon16 << (16 + 24)) | (mant48)
65
- ex de, hl
66
- pop hl
185
+ sub a, 31 ; normalize clz_result
67
186
68
- ; ld b, a
69
- inc b
70
- ld a, e
71
- .loop32: ; shift by 32 is not UB here!
72
- add hl, hl
73
- rla
74
- djnz .loop32
75
- ld e, a
76
- else
77
- ; calculate the exponent
78
- push hl
79
- ; 1023 + 31 = 1054 = 0x41E
80
- ld hl, $041E00
81
- ld c, l ; ld c, 0
82
- sbc hl, bc
83
- ld l, e ; (expon16 << (16 + 24)) | (mant48)
84
- ex de, hl
187
+ ; filter out exponent of $000 (zero) and $3FF (one)
188
+ jr nc, __int_to_f64_zero_or_one
189
+ ; A is [-31, -1]
190
+ add a, 52
191
+ ; A is [21, 51]
85
192
86
- ld l, b
87
- pop bc
88
- ld a, e
89
- call __lshl
90
- push bc
91
- pop hl
92
- ; shift by 32 is UB
93
- add hl, hl
94
- rla
95
- ld e, a
96
- end if
193
+ require __int_to_f64_shl
97
194
98
- ; UDE:D has expon, E:UHL has mant
99
- ; Float64_mant_bits - uint48_bits = 4
100
- ld c, 16 + 4
101
- push bc
195
+ ;-------------------------------------------------------------------------------
196
+
197
+ section .text
198
+
199
+ private __int_to_f64_shl
200
+ __int_to_f64_shl:
201
+ ; exponent = (1023 or $3FF or f64_bias) + base2_logarithm
202
+ ; Minimum exponent: $400 (2^1)
203
+ ; Maximum exponent: $434 (2^52)
204
+ ; It is assumed that A is [0, 51] here, or [-52, -1] before adding 52
205
+ push hl
206
+ ld l, a
207
+ ex (sp), hl ; (SP) = shift
102
208
call __llshl
103
- pop af ; reset SP
209
+ ex (sp), hl ; (SP) = shifted HL, L = shift
210
+
211
+ ld a, 51
212
+ sub a, l
213
+
214
+ ; exponent = ($400 + (base2_logarithm - 1)) << 4
215
+ ; BC = $4EEM
216
+ ld l, a
217
+ ld h, $04
218
+ ; clear the implicit mantissa bit
219
+ res 4, c ; 52 % 8 == 4
220
+ add hl, hl
221
+ add hl, hl
222
+ add hl, hl
223
+ add hl, hl
224
+ ld a, l
225
+ or a, c
226
+ ld c, a
227
+ ld b, h
228
+ pop hl ; restore shifted HL
104
229
.finish:
105
230
pop af
106
- ret nc ; positive
231
+ ret z
107
232
set 7, b
108
- ret ; negative
109
-
110
- .zero:
111
- ; E:UHL and A are zero
112
- ex de, hl
113
- sbc hl, hl
114
- ld b, e
115
- ld c, e
116
- pop af
117
233
ret
118
234
235
+ ;-------------------------------------------------------------------------------
236
+
119
237
extern __ineg
120
238
extern __lneg
121
239
extern __lctlz
122
- extern __lshl
240
+ extern __llctlz
123
241
extern __llshl
242
+ extern __llshru
243
+ extern __llneg
244
+ extern __lladd_1
0 commit comments