diff --git a/src/crt/cttz.src b/src/crt/cttz.src new file mode 100644 index 000000000..6e938859f --- /dev/null +++ b/src/crt/cttz.src @@ -0,0 +1,128 @@ + assume adl=1 + + section .text + public __bcttz +__bcttz: + cp a, 1 + require __cttz_common + + section .text + private __cttz_common + ; Input: A=byte, CF=(A==0) + ; Output: A=cttz(A) +__cttz_common: + adc a, a + add a, a + jr z, .high2 + add a, a + add a, a + jr z, .high4 + add a, a + add a, a + jr z, .high6 + add a, a + add a, a + sbc a, -1 + ret p + ld a, 8 + ret +.high6: + sbc a, -3 + ret +.high4: + sbc a, -5 + ret +.high2: + sbc a, -7 + ret + + section .text + public __scttz +__scttz: + xor a, a + or a, l + jr nz, __cttz_common + sub a, h + ccf + require __scttz.hijack + + section .text + private __scttz.hijack +__scttz.hijack: + call __cttz_common + add a, 8 + ret + + section .text + public __icttz +__icttz: + xor a, a + require __icttz.hijack + + section .text + private __icttz.hijack +__icttz.hijack: + or a, l + jr nz, __cttz_common + or a, h + jr nz, __scttz.hijack + dec sp + push hl + inc sp + pop af + call __bcttz + add a, 16 + ret + + section .text + public __lcttz +__lcttz: + add hl, de + xor a, a + sbc hl, de + jr nz, __icttz.hijack + sub a, e + ccf + call __cttz_common + add a, 24 + ret + + section .text + public __llcttz +__llcttz: + add hl, de + xor a, a + sbc hl, de + jr nz, __icttz.hijack + ex de, hl + sbc hl, de + jr nz, __i48cttz.hijack + or a, c + jr nz, .low + sub a, b + ccf + call __cttz_common + add a, 56 + ret +.low: + call __cttz_common + add a, 48 + ret + + section .text + public __i48cttz +__i48cttz: + add hl, de + xor a, a + sbc hl, de + jr nz, __icttz.hijack + ex de, hl + require __i48cttz.hijack + + section .text + private __i48cttz.hijack +__i48cttz.hijack: + call __icttz.hijack + ex de, hl + add a, 24 + ret diff --git a/src/libc/ez80_builtin.src b/src/libc/ez80_builtin.src index 004e724c3..e567b1cc3 100644 --- a/src/libc/ez80_builtin.src +++ b/src/libc/ez80_builtin.src @@ -6,18 +6,10 @@ public ___ez80_ctzc ___ez80_ctzc: - ; unoptimized ld hl, 3 add hl, sp - ld l, (hl) - xor a, a - sub a, l - and a, l - call __bctlz - bit 3, a - ret nz - xor a, 7 - ret + ld a, (hl) + jp __bcttz ;------------------------------------------------------------------------------- @@ -25,25 +17,14 @@ ___ez80_ctzc: public ___ez80_ctzi48 ___ez80_ctzi48: - ; unoptimized - ld hl, 3 + ld hl, 6 add hl, sp - ld bc, (hl) - inc hl - inc hl - inc hl - ld iy, (hl) - sbc hl, hl - add hl, bc - lea de, iy - call __i48neg - call __i48and - call __i48ctlz - cpl - add a, 48 - ret p - ld a, 48 - ret + ld de, (hl) + dec hl + dec hl + dec hl + ld hl, (hl) + jp __i48cttz ;------------------------------------------------------------------------------- @@ -51,16 +32,12 @@ ___ez80_ctzi48: public ___ez80_ffsc ___ez80_ffsc: - ; unoptimized ld hl, 3 add hl, sp - ld l, (hl) - xor a, a - sub a, l - and a, l - call __bctlz - cpl - add a, 9 + ld a, (hl) + or a, a + rla + jp nz, __bcttz ret ;------------------------------------------------------------------------------- @@ -69,24 +46,13 @@ ___ez80_ffsc: public ___ez80_ffss ___ez80_ffss: - ; unoptimized ld hl, 3 add hl, sp ld hl, (hl) - ; HL & -HL - ld b, h - ld c, l - sbc hl, hl - sbc hl, bc ld a, h - and a, b - ld h, a - ld a, l - and a, c - ld l, a - call __sctlz - cpl - add a, 17 + or a, l + add hl, hl + jp nz, __scttz ret ;------------------------------------------------------------------------------- @@ -95,22 +61,18 @@ ___ez80_ffss: public ___ez80_ffsi48 ___ez80_ffsi48: - ; unoptimized - ld hl, 3 + ld hl, 6 add hl, sp - ld bc, (hl) - inc hl - inc hl - inc hl - ld iy, (hl) - sbc hl, hl - add hl, bc - lea de, iy - call __i48neg - call __i48and - call __i48ctlz - cpl - add a, 49 + ld de, (hl) + dec hl + dec hl + dec hl + ld hl, (hl) + call __i48cttz + inc a + cp a, 49 + ret nz + xor a, a ret ;------------------------------------------------------------------------------- @@ -406,13 +368,15 @@ ___ez80_rotateright48: extern __snot extern __i48not - extern __i48and - extern __i48neg extern __bctlz extern __sctlz extern __i48ctlz + extern __bcttz + extern __scttz + extern __i48cttz + extern __bpopcnt extern __spopcnt extern __i48popcnt diff --git a/test/standalone/ez80_builtin/src/main.c b/test/standalone/ez80_builtin/src/main.c index 30e97e53f..4ef4312ca 100644 --- a/test/standalone/ez80_builtin/src/main.c +++ b/test/standalone/ez80_builtin/src/main.c @@ -121,7 +121,7 @@ static int test_ctzc(void) { unsigned char input; for (int i = 0; i < 256; i++) { input = (unsigned char)i; - truth = (input == 0) ? 8 : __builtin_ctz((unsigned int)input); + truth = (input == 0) ? 8 : __builtin_ctz((unsigned int)input | INT_MIN); guess = __ez80_ctzc(input); CMP("%d", input, truth, guess); } @@ -271,7 +271,7 @@ static int test_ctzi48(void) { CMP("%012llX", (uint64_t)UINT48_MAX, 0, __ez80_ctzi48(UINT48_MAX)); for (int i = 0; i < RANDOM_TEST_COUNT; i++) { input = rand48(); - truth = (input == 0) ? 48 : __builtin_ctzll((unsigned long long)input); + truth = (input == 0) ? 48 : __builtin_ctzll((unsigned long long)input | INT64_MIN); guess = __ez80_ctzi48(input); CMP("%012llX", (uint64_t)input, truth, guess); }