CE-Programming
diff --git a/‎src/libc/ldexpf.src
Lines changed: 145 additions & 160 deletions b/‎src/libc/ldexpf.src
Lines changed: 145 additions & 160 deletions
@@ -16,202 +16,187 @@ _scalbn := _ldexpf
 
 else
 
-if 1
+; (set to 0 or 1) avoid returning negative zero on underflow with Ti's floats
+__ldexpf_avoid_negative_zero := 1
 
-; NOTE: since Ti floats are used, negative zero will not be returned unless the
-; input was negative zero.
-;
-; normal inputs are handled correctly, unless the output is subnormal
-; subnormal inputs/outputs return zero and set ERANGE and FE_INEXACT
-; zero/infinite/NaN inputs are handled correctly
-_ldexpf:
-_ldexp:
-_scalbnf:
+; ldexpf behaviour:
+; - signed zero, infinity, and NaN inputs are returned unmodified
+; - ERRNO and FE_INEXACT are set if a finite value becomes zero or infinite
+; - FE_INEXACT is set if rounding occured
+;-------------------------------------------------------------------------------
+
+	private	__ldexpf_helper
+__ldexpf_helper:
+.maybe_subnormal:
+	or	a, a
+	adc	hl, bc	; BC is zero
+.ret_self:
+	ld	hl, (iy + 3)	; mant
+	ret	z	; return zero/inf/NaN
+	dec	bc	; BC is now -1
+; .subnormal_input:
+	; BC is -1 here
+	bit	7, (iy + 11)	; scale sign
+	jr	nz, .move_subnormal_down
+; .move_subnormal_up:
+	ld	a, e		; signbit
+	ld	de, (iy + 9)	; scale
+.norm_loop:
+	add	hl, hl
+	jr	c, .normalized
+	ex	de, hl
+	add	hl, bc	; --scale
+	ex	de, hl
+	jr	c, .norm_loop
+; .still_subnormal:
+	; DE is -1 here
+	inc	e	; ld e, 0
+	jr	_ldexpf.finish_subnormal
+.normalized:
+	inc	de
+	ex	de, hl
+	jr	_ldexpf.scale_up
+
+.move_subnormal_down:
+	; BC is -1 here
+	; first we need to test that the result won't be zero
+	call	__ictlz
+	; A is [1, 23]
+	; return zero if (scale < clz_result - 24) or (clz_result - 25 >= scale)
+	sub	a, 24	; A is [-23, -1]
+	ld	c, a	; sign extend A
+	ld	hl, (iy + 9)	; scale
+	ld	a, l
+	or	a, a
+	sbc	hl, bc
+	cpl
+	jr	nc, _ldexpf.shru_common
+.underflow_to_zero:
+	xor	a, a
+	ld	b, a	; ld b, 0
+if __ldexpf_avoid_negative_zero
+	res	7, (iy + 6)
+end if
+.overflow_to_inf:	; <-- Carry is set when inf/NaN
+	ld	hl, 5	; ERANGE
+	ld	(_errno), hl
+	ld	l, h	; ld l, 0
+	ex	de, hl
+	jr	nc, _ldexpf.underflow_hijack
+	ld	de, $800000
+	ld	b, $7F
+	jr	_ldexpf.overflow_hijack
+
+;-------------------------------------------------------------------------------
+; When the input and output are normal:
+; scaling up  : 60F + 12R + 4W + 2
+; scaling down: 60F + 12R + 4W + 4
 _scalbn:
+_scalbnf:
+_ldexp:
+_ldexpf:
 	ld	iy, 0
 	lea	bc, iy + 0
 	add	iy, sp
 	ld	hl, (iy + 3)	; mant
 	add	hl, hl
 	ld	a, (iy + 6)	; expon
+	ld	e, a		; signbit
 	adc	a, a
-	jr	z, .maybe_subnormal
+	jr	z, __ldexpf_helper.maybe_subnormal
+	ld	c, a
 	inc	a
-	jr	z, .ret_self	; inf NaN
-	dec	a
+	jr	z, __ldexpf_helper.ret_self	; inf NaN
+	ld	a, e		; signbit
 	ex	de, hl
 	ld	hl, (iy + 9)	; scale
-	ld	c, a
 	add	hl, bc	; add expon
-	ld	a, l
 	bit	7, (iy + 11)	; scale sign
-	jr	z, .scale_up
-.scale_down:
-	; HL is not INT_MIN here
-	dec	hl
-	add	hl, hl
-	jr	nc, .finish	; expon > 0
-	; expon <= 0 or subnormal
-.underflow_to_zero:
-	ld	hl, ___fe_cur_env
-	set	5, (hl)	; FE_INEXACT
-if 0
-	ld	a, (iy + 6)
-	and	a, $80	; copysign
-else
-	xor	a, a	; avoid returning negative zero with Ti's floats
-end if
-	sbc	hl, hl
-.common_erange:
-	ld	bc, 5	; ERANGE
-	ld	(_errno), bc
-	ld	e, a
-	ret
-.overflow_to_inf:
-	ld	hl, $800000
-	ld	a, (iy + 6)
-	or	a, $7F	; copysign
-	jr	.common_erange
-
+	jr	nz, .scale_down
 .scale_up:
-	ld	bc, -255
+	ld	bc, -255	; $FFFF01
 	add	hl, bc
-	jr	c, .overflow_to_inf
-.finish:
-	ld	l, a
+	jr	c, __ldexpf_helper.overflow_to_inf
+	; sbc	hl, bc	; restore hl
+	dec	l	; we only care about the low 8 bits
 	ex	de, hl
-	; signbit(A) E:UHL >>= 1
-	ld	a, (iy + 6)	; expon
+.finish_subnormal:
 	push	hl
-	rla
+.finish:
+	rla	; extract signbit
 	rr	e
 	rr	(iy - 1)
 	pop	hl
 	rr	h
 	rr	l
 	ret
 
-.maybe_subnormal:
-	dec	bc	; BC is now -1
-	add	hl, bc
-	jr	c, .underflow_to_zero
-	; return zero
-.ret_self:
-	ld	hl, (iy + 3)
-	ld	e, (iy + 6)
-	ret
-
-else
-
-; normal inputs are handled correctly, unless the output is subnormal
-; subnormal inputs are handled correctly for positive scaling values
-; subnormal outputs return zero and set ERANGE and FE_INEXACT for negative scaling values
-; zero/infinite/NaN inputs are handled correctly
-_ldexpf:
-_ldexp:
-_scalbnf:
-_scalbn:
-	ld	iy, 0
-	lea	bc, iy + 0
-	add	iy, sp
-	ld	hl, (iy + 3)	; mant
-	add	hl, hl
-	ld	e, (iy + 6)	; expon
-	ld	a, e
-	rl	e
-	jr	z, .maybe_subnormal
-	inc	e
-	jr	z, .ret_self	; inf NaN
-	dec	e
-	ld	c, e
-	ex	de, hl
-	ld	hl, (iy + 9)	; scale
-	add	hl, bc	; add expon
-	ld	a, l
-	bit	7, (iy + 11)	; scale sign
-	jr	z, .scale_up
+;-------------------------------------------------------------------------------
 .scale_down:
-	; test signbit
-	push	hl
+	push	de	; mant <<= 1
+	ld	e, l	; shift amount
 	; HL is not INT_MIN here
 	dec	hl
 	add	hl, hl
-	pop	hl
 	jr	nc, .finish	; expon > 0
-	; expon <= 0 or subnormal
-;	jr	.underflow_to_zero
-.underflow_to_zero:
+;-------------------------------------------------------------------------------
+.shru_to_subnormal:
+	xor	a, a
+	sub	a, e
+	pop	de
+	ld	c, 48	; ld bc, 24 << 1
+	add	hl, bc
+	jr	nc, __ldexpf_helper.underflow_to_zero
+
+	set	7, (iy + 5)	; set implicit mantissa bit
+.shru_common:
+	; A should be [0, 23]
+	ld	b, a
+	ld	hl, (iy + 3)	; mantissa
+	push	hl	; ld (iy - 3), hl
+	xor	a, a
+	inc	b
+	; shift amount will be [1, 24]
+	ld	c, a	; ld c, 0
+	ld	d, (iy - 1)
+.shru_loop:
+	adc	a, c	; collect sticky bits
+	srl	d
+	rr	h
+	rr	l
+	djnz	.shru_loop
+	ld	(iy - 1), d
+	pop	de
+	ld	d, h
+	ld	e, l
+
+	; round upwards to even if (round && (guard || sticky))
+	jr	nc, .no_round
+	; be careful not to touch the carry flag
+	inc	a
+	dec	a
+	jr	nz, .round_up
+	bit	0, e	; test guard bit
+	jr	z, .no_round
+.round_up:
+	inc	de	; round upwards to even (wont overflow)
+.no_round:
+	adc	a, a
+	jr	z, .result_is_exact
+.underflow_hijack:
+.overflow_hijack:
 	ld	hl, ___fe_cur_env
 	set	5, (hl)	; FE_INEXACT
-	ld	a, (iy + 6)
+.result_is_exact:
+	ld	a, (iy + 6)	; get signbit
+	ex	de, hl
 	and	a, $80	; copysign
-	sbc	hl, hl
-.common_erange:
-	ld	bc, 5	; ERANGE
-	ld	(_errno), bc
+	or	a, b	; used for the overflow to infinite path
 	ld	e, a
 	ret
-.overflow_to_inf:
-	ld	hl, $800000
-	ld	a, (iy + 6)
-	or	a, $7F	; copysign
-	jr	.common_erange
 
-.scale_up:
-	ld	bc, -255
-	add	hl, bc
-	jr	c, .overflow_to_inf
-.finish:
-	ld	l, a
-	ex	de, hl
-.finish_subnormal:
-	; signbit(A) E:UHL >>= 1
-	ld	a, (iy + 6)	; expon
-	push	hl
-	rla
-	rr	e
-	rr	(iy - 1)
-	pop	hl
-	rr	h
-	rr	l
-	ret
-
-.maybe_subnormal:
-	dec	bc	; BC is now -1
-	add	hl, bc
-;	jr	c, .underflow_to_zero
-	jr	c, .subnormal
-	; return zero
-.ret_self:
-	ld	hl, (iy + 3)
-	ld	e, (iy + 6)
-	ret
-
-.subnormal:
-	; BC is -1 here
-	bit	7, (iy + 11)	; scale sign
-	jr	nz, .underflow_to_zero
-	ld	de, (iy + 9)	; scale
-	ld	hl, (iy + 3)	; mant
-.norm_loop:
-	add	hl, hl
-	jr	c, .normalized
-	ex	de, hl
-	add	hl, bc	; --scale
-	ex	de, hl
-	jr	c, .norm_loop
-; .still_subnormal:
-	ld	e, 0
-	jr	.finish_subnormal
-.normalized:
-	inc	de
-	ex	de, hl
-	ld	a, l
-	jr	.scale_up
-
-end if
-
-	extern	___fe_cur_env
 	extern	_errno
+	extern	___fe_cur_env
+	extern	__ictlz
 
 end if