Skip to content

Commit 91bf11c

Browse files
calc84maniacmateoconlechuga
authored andcommitted
Optimize 48-bit division
1 parent 3d69ce4 commit 91bf11c

File tree

1 file changed

+68
-99
lines changed

1 file changed

+68
-99
lines changed

src/crt/i48dvrmu.src

Lines changed: 68 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -1,120 +1,89 @@
11
; Performs 48-bit division and modulo
22
;
33
; Arguments:
4-
; ude:uhl = numerator
5-
; uiy:ubc = denominator
4+
; ude:uhl = dividend
5+
; uiy:ubc = divisor
66
;
77
; Returns (loaded into other registers by i48divu/s and i48remu/s):
88
; ude:uhl quotient, uiy:ubc remainder
99

10-
assume adl=1
10+
assume adl=1
1111

12-
section .data
12+
section .data
1313

14-
section .text
15-
public __i48dvrmu
14+
section .text
15+
public __i48dvrmu
1616

1717
__i48dvrmu:
18-
;backup af
19-
push af
18+
;backup af
19+
push af
2020

21-
push ix
22-
ld ix, 0
23-
add ix, sp
24-
;backup interrupt
25-
ld a, i ; P = IEF2
26-
di
27-
push af
21+
;backup interrupt
22+
ld a, i ; P = IEF2
23+
di
24+
push af
2825

29-
exx
30-
ld hl, 0
31-
exx
26+
; save dividend to stack
27+
push de
28+
push hl
3229

33-
push de
34-
push hl
35-
36-
;Stack Use:
37-
; ix-1 : 0
38-
; ix-2 : a interrupt stuff
39-
; ix-3 : f interrupt stuff
40-
; ix-4 : deu n[5]
41-
; ix-5 : d n[4]
42-
; ix-6 : e n[3]
43-
; ix-7 : hlu n[2]
44-
; ix-8 : h n[1]
45-
; ix-9 : l n[0]
46-
47-
; Set up for c quotient, uhl':uhl remainder, and ude':ude denom
48-
49-
; ude = low denom
50-
ld de, 0
51-
ex de, hl
52-
add hl, bc
53-
ex de, hl
54-
55-
exx
56-
lea de, iy+0 ; ude' = iy = high denom
57-
exx
58-
59-
ld bc, $000600 ; b = byte counter
60-
ld iy, 0
61-
ld hl, 0
30+
; Set up for ubc' dividend/quotient ptr, uhl':uhl remainder, and ude':ude divisor
31+
push bc
32+
pop de ; ude = low divisor
33+
ld hl, 6
34+
ld c, l ; c = byte counter
35+
add hl, sp
36+
push hl
37+
sbc hl, hl ; uhl = low remainder
38+
exx
39+
pop bc ; ubc' = dividend/quotient pointer
40+
lea de, iy ; ude' = iy = high divisor
41+
sbc hl, hl ; uhl' = high remainder
6242
.byteLoop:
63-
exx
64-
ld b, 8 ; b' = bit counter
65-
ld a, (ix-4); a = relevant numerator byte
43+
dec bc
44+
ld a, (bc); a = relevant dividend byte
45+
exx
46+
ld b, 8 ; b = bit counter
6647
.bitLoop:
67-
exx ; loop entered with alt registers as b' is bit counter, exx here to not be in alt
68-
sla c ; q<<1
69-
; r<<1 and r[0] = n[b]
70-
rla ; c = n[b]
71-
adc hl, hl ; low r += low r + n[b]
72-
exx
73-
adc hl, hl ; high r += high r + c from low r
74-
exx
75-
; r - d
76-
sbc hl, de ; low r -= low d
77-
exx
78-
sbc hl, de ; high r -= high d
79-
exx
80-
jr nc, .greaterEqual
81-
; restore if r<d
82-
add hl, de ; restore low
83-
exx
84-
adc hl, de ; restore high
85-
djnz .bitLoop
86-
jr .postBit
48+
; r<<1 and r[0] = n[b]
49+
rla ; c = n[b], ~q[b-1] = c
50+
adc hl, hl ; low r += low r + n[b]
51+
exx
52+
adc hl, hl ; high r += high r + c from low r
53+
exx
54+
; r - d
55+
sbc hl, de ; low r -= low d
56+
exx
57+
sbc hl, de ; high r -= high d
58+
exx
59+
jr nc, .greaterEqual
60+
; restore if r<d
61+
add hl, de ; restore low
62+
exx
63+
adc hl, de ; restore high
64+
exx
8765
.greaterEqual:
88-
; keep r -= d, and update quotient
89-
inc c
90-
exx
91-
djnz .bitLoop
92-
.postBit:
93-
exx
94-
ld (ix-4), c ; now that numerator byte isn't needed, overwrite with quotient
95-
dec ix
96-
djnz .byteLoop
97-
98-
;finish and clean up
99-
100-
; iy = remainder high
101-
ld iy, 0
102-
exx
103-
ex de, hl
104-
add iy, de
105-
exx
66+
; carry contains inverted quotient bit, which is saved in the next iteration
67+
djnz .bitLoop
68+
rla ; ~q[b] = c
69+
cpl ; uninvert quotient
70+
dec c ; decrement byte counter
71+
exx
72+
ld (bc), a ; now that dividend byte isn't needed, overwrite with quotient
73+
jr nz, .byteLoop
10674

107-
; bc = remainder low
108-
push hl
109-
pop bc
75+
;finish and clean up
11076

111-
pop hl ; hl = lower quotient
112-
pop de ; de = upper quotient
77+
push hl
78+
exx
79+
pop iy ; iy = remainder high
80+
ex (sp), hl ; hl = lower quotient
81+
pop bc ; bc = remainder low
82+
pop de ; de = upper quotient
11383

114-
pop af
115-
jp po, .skipEI
116-
ei
84+
pop af
85+
jp po, .skipEI
86+
ei
11787
.skipEI:
118-
pop ix
119-
pop af
120-
ret
88+
pop af
89+
ret

0 commit comments

Comments
 (0)