Skip to content

Commit 8529ceb

Browse files
committed
implemented lltod/ulltod in assembly and optimized ltod/ultod
1 parent a9687a1 commit 8529ceb

File tree

3 files changed

+194
-101
lines changed

3 files changed

+194
-101
lines changed

src/crt/lltod.src

Lines changed: 0 additions & 14 deletions
This file was deleted.

src/crt/ltod.src

Lines changed: 194 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,136 @@
11
assume adl=1
22

3+
;-------------------------------------------------------------------------------
4+
5+
section .text
6+
7+
public __ulltod
8+
; (long double)unsigned long long
9+
__ulltod:
10+
cp a, a ; set Z flag
11+
push af
12+
jq __lltod_common
13+
14+
;-------------------------------------------------------------------------------
15+
16+
section .text
17+
18+
public __lltod
19+
; (long double)long long
20+
__lltod:
21+
bit 7, b
22+
push af
23+
call nz, __llneg ; abs(BC:UDE:UHL)
24+
25+
require __lltod_common
26+
27+
;-------------------------------------------------------------------------------
28+
29+
section .text
30+
31+
private __lltod_common
32+
__lltod_common:
33+
call __llctlz
34+
sub a, 63 ; normalize clz_result
35+
; filter out exponent of $000 (zero) and $3FF (one)
36+
jr nc, __int_to_f64_zero_or_one
37+
; A is [-63, -1]
38+
add a, 52
39+
; A is [-11, 51]
40+
jr c, __int_to_f64_shl
41+
; __int_to_f64_shr:
42+
; exponent = (1023 or $3FF or f64_bias) + base2_logarithm
43+
; Minimum exponent: $434 (2^53)
44+
; Maximum exponent: $43E (2^63)
45+
; It is assumed that A is [-11, -1] here, or [-63, -53] before adding 52
46+
cpl
47+
inc a
48+
; A is [1, 11]
49+
push hl
50+
push bc
51+
ld b, a
52+
ld c, 1
53+
.shift_loop:
54+
jr nc, .no_carry
55+
inc c
56+
.no_carry:
57+
srl h
58+
rr l
59+
djnz .shift_loop
60+
; test round bit
61+
jr nc, .no_round
62+
; test sticky bits
63+
dec c
64+
jr nz, .round_up
65+
; test guard bit
66+
bit 0, l
67+
jr nc, .no_round
68+
.round_up:
69+
inc b ; round up after shifting
70+
.no_round:
71+
ld h, b
72+
pop bc
73+
74+
ld l, a
75+
ex (sp), hl ; (SP) = shift
76+
call __llshru
77+
ex (sp), hl ; (SP) = shifted HL, H = rounding, L = shift
78+
add a, 51
79+
80+
dec h
81+
push af
82+
; exponent = ($400 + (base2_logarithm - 1)) << 4
83+
; BC = $4EEM
84+
ld l, a
85+
ld h, $04
86+
; clear the implicit mantissa bit
87+
res 4, c ; 52 % 8 == 4
88+
add hl, hl
89+
add hl, hl
90+
add hl, hl
91+
add hl, hl
92+
ld a, l
93+
or a, c
94+
ld c, a
95+
ld b, h
96+
pop af
97+
pop hl ; restore shifted HL
98+
call z, __lladd_1 ; round up to even
99+
jr __int_to_f64_shl.finish
100+
101+
;-------------------------------------------------------------------------------
102+
103+
section .text
104+
105+
private __int_to_f64_zero_or_one
106+
__int_to_f64_zero_or_one:
107+
; carry is cleared here
108+
; UHL is either one or zero
109+
ld b, h
110+
ld c, h
111+
jr nz, .ret_zero
112+
ld bc, $3FF0
113+
dec hl ; ld hl, 0
114+
.ret_zero:
115+
ex de, hl
116+
sbc hl, hl
117+
jr __int_to_f64_shl.finish
118+
119+
;-------------------------------------------------------------------------------
120+
3121
section .text
4122

5123
public __itod
6124
; (long double)int
7125
__itod:
8126
push hl
9127
add hl, hl ; extract signbit
128+
sbc hl, hl ; set Z flag
129+
ld e, l ; sign extend UHL to E:UHL
10130
pop hl
11-
push af
12-
ld e, 0
13-
call c, __ineg ; abs(UHL)
14-
jr __ltod.hijack
131+
jq __ltod
132+
133+
;-------------------------------------------------------------------------------
15134

16135
section .text
17136

@@ -22,102 +141,104 @@ __utod:
22141

23142
require __ultod
24143

144+
;-------------------------------------------------------------------------------
145+
25146
section .text
26147

27148
public __ultod
28149
; (long double)unsigned long
29150
__ultod:
30-
or a, a
31-
push af
32-
jr __ltod.hijack
151+
cp a, a ; set Z flag
152+
push af
153+
jq __ltod_common
154+
155+
;-------------------------------------------------------------------------------
33156

34157
section .text
35158

36159
public __ltod
37160
; (long double)long
38161
__ltod:
39-
rlc e
162+
bit 7, e
163+
164+
require __ltod.hijack_itod
165+
166+
;-------------------------------------------------------------------------------
167+
168+
section .text
169+
170+
private __ltod.hijack_itod
171+
__ltod.hijack_itod:
172+
40173
push af
41-
rrc e
42-
call c, __lneg ; abs(E:UHL)
174+
call nz, __lneg ; abs(E:UHL)
43175

44-
require __ltod.hijack
176+
require __ltod_common
177+
178+
;-------------------------------------------------------------------------------
45179

46180
section .text
47181

48-
private __ltod.hijack
49-
__ltod.hijack:
182+
private __ltod_common
183+
__ltod_common:
50184
call __lctlz
51-
inc.s bc ; clear UBC
52-
ld b, a ; <<= 8
53-
xor a, $20 ; turns 32 into zero and clears carry flag
54-
jr z, .zero
55-
; clears the MSB since the float will be normalized
56-
; x <<= clz_result + 1; /* shift by 32 is UB */
57-
if 0
58-
; calculate the exponent
59-
push hl
60-
; 1023 + 31 = 1054 = 0x41E
61-
ld hl, $041E00
62-
ld c, l ; ld c, 0
63-
sbc hl, bc
64-
ld l, e ; (expon16 << (16 + 24)) | (mant48)
65-
ex de, hl
66-
pop hl
185+
sub a, 31 ; normalize clz_result
67186

68-
; ld b, a
69-
inc b
70-
ld a, e
71-
.loop32: ; shift by 32 is not UB here!
72-
add hl, hl
73-
rla
74-
djnz .loop32
75-
ld e, a
76-
else
77-
; calculate the exponent
78-
push hl
79-
; 1023 + 31 = 1054 = 0x41E
80-
ld hl, $041E00
81-
ld c, l ; ld c, 0
82-
sbc hl, bc
83-
ld l, e ; (expon16 << (16 + 24)) | (mant48)
84-
ex de, hl
187+
; filter out exponent of $000 (zero) and $3FF (one)
188+
jr nc, __int_to_f64_zero_or_one
189+
; A is [-31, -1]
190+
add a, 52
191+
; A is [21, 51]
85192

86-
ld l, b
87-
pop bc
88-
ld a, e
89-
call __lshl
90-
push bc
91-
pop hl
92-
; shift by 32 is UB
93-
add hl, hl
94-
rla
95-
ld e, a
96-
end if
193+
require __int_to_f64_shl
97194

98-
; UDE:D has expon, E:UHL has mant
99-
; Float64_mant_bits - uint48_bits = 4
100-
ld c, 16 + 4
101-
push bc
195+
;-------------------------------------------------------------------------------
196+
197+
section .text
198+
199+
private __int_to_f64_shl
200+
__int_to_f64_shl:
201+
; exponent = (1023 or $3FF or f64_bias) + base2_logarithm
202+
; Minimum exponent: $400 (2^1)
203+
; Maximum exponent: $434 (2^52)
204+
; It is assumed that A is [0, 51] here, or [-52, -1] before adding 52
205+
push hl
206+
ld l, a
207+
ex (sp), hl ; (SP) = shift
102208
call __llshl
103-
pop af ; reset SP
209+
ex (sp), hl ; (SP) = shifted HL, L = shift
210+
211+
ld a, 51
212+
sub a, l
213+
214+
; exponent = ($400 + (base2_logarithm - 1)) << 4
215+
; BC = $4EEM
216+
ld l, a
217+
ld h, $04
218+
; clear the implicit mantissa bit
219+
res 4, c ; 52 % 8 == 4
220+
add hl, hl
221+
add hl, hl
222+
add hl, hl
223+
add hl, hl
224+
ld a, l
225+
or a, c
226+
ld c, a
227+
ld b, h
228+
pop hl ; restore shifted HL
104229
.finish:
105230
pop af
106-
ret nc ; positive
231+
ret z
107232
set 7, b
108-
ret ; negative
109-
110-
.zero:
111-
; E:UHL and A are zero
112-
ex de, hl
113-
sbc hl, hl
114-
ld b, e
115-
ld c, e
116-
pop af
117233
ret
118234

235+
;-------------------------------------------------------------------------------
236+
119237
extern __ineg
120238
extern __lneg
121239
extern __lctlz
122-
extern __lshl
240+
extern __llctlz
123241
extern __llshl
242+
extern __llshru
243+
extern __llneg
244+
extern __lladd_1

src/crt/ulltod.src

Lines changed: 0 additions & 14 deletions
This file was deleted.

0 commit comments

Comments
 (0)