|
| 1 | +using Parsers |
| 2 | +using Parsers: AbstractConf, Result |
| 3 | + |
| 4 | +""" |
| 5 | + RoundThrows |
| 6 | +
|
| 7 | +Raises an `InexactError` if any rounding is necessary. |
| 8 | +""" |
| 9 | +const RoundThrows = RoundingMode{:Throw}() |
| 10 | + |
| 11 | +# make our own conf struct to avoid specializing Parsers.typeparser on each unique precision value |
| 12 | +struct FixedDecimalConf{T<:Integer} <: AbstractConf{T} |
| 13 | + f::Int |
| 14 | +end |
| 15 | +# This overload says that when parsing a FixedDecimal type, use our new custom FixedDecimalConf type |
| 16 | +Parsers.conf(::Type{FixedDecimal{T,f}}, opts::Parsers.Options, kw...) where {T<:Integer,f} = FixedDecimalConf{T}(f) |
| 17 | +# Because the value returned from our `typeparser` isn't a FixedDecimal, we overload here to show we're returning an integer type |
| 18 | +Parsers.returntype(::Type{FixedDecimal{T,f}}) where {T,f} = T |
| 19 | +# This overload allows us to take the Result{IntegerType} returned from typeparser and turn it into a FixedDecimal Result |
| 20 | +function Parsers.result(FD::Type{FixedDecimal{T,f}}, res::Parsers.Result{T}) where {T,f} |
| 21 | + return Parsers.invalid(res.code) ? Result{FD}(res.code, res.tlen) : |
| 22 | + Result{FD}(res.code, res.tlen, reinterpret(FD, res.val)) |
| 23 | +end |
| 24 | +# Tell Parsers that we can use our custom typeparser and not rely on Base.tryparse |
| 25 | +Parsers.supportedtype(::Type{<:FixedDecimal}) = true |
| 26 | + |
| 27 | +const OPTIONS_ROUND_NEAREST = Parsers.Options(rounding=RoundNearest) |
| 28 | +const OPTIONS_ROUND_TO_ZERO = Parsers.Options(rounding=RoundToZero) |
| 29 | +const OPTIONS_ROUND_THROWS = Parsers.Options(rounding=nothing) |
| 30 | + |
| 31 | +# TODO: a lookup table per type would be faster |
| 32 | +@inline _shift(n::T, decpos) where {T} = T(10)^decpos * n |
| 33 | + |
| 34 | +const _BIGINT1 = BigInt(1) |
| 35 | +const _BIGINT2 = BigInt(2) |
| 36 | +const _BIGINT10 = BigInt(10) |
| 37 | +const _BIGINT_10s = BigInt[] # buffer for "remainders" in _divpow10!, accessed via `Parsers.access_threaded` |
| 38 | +const _BIGINT_Rs = BigInt[] # buffer for "remainders" in _divpow10!, accessed via `Parsers.access_threaded` |
| 39 | + |
| 40 | +for T in (Base.BitSigned_types..., Base.BitUnsigned_types...) |
| 41 | + let bytes = Tuple(codeunits(string(typemax(T)))) |
| 42 | + # The number of digits an integer of type T can hold |
| 43 | + @eval _maxintdigits(::Type{$T}) = $(length(bytes)) |
| 44 | + end |
| 45 | +end |
| 46 | + |
| 47 | +# All `v`s are non-negative |
| 48 | +function _unsafe_convert_int(::Type{T}, v::V) where {T<:Integer,V<:Integer} |
| 49 | + return sizeof(T) > sizeof(V) ? T(v) : |
| 50 | + sizeof(T) < sizeof(V) ? unsafe_trunc(T, v) : |
| 51 | + Base.bitcast(T, v) |
| 52 | +end |
| 53 | +_unsafe_convert_int(::Type{T}, v::BigInt) where {T<:Integer} = unsafe_trunc(T, v) |
| 54 | +_unsafe_convert_int(::Type{T}, v::T) where {T<:Integer} = v |
| 55 | + |
| 56 | +function _check_overflows(::Type{T}, v::BigInt, neg::Bool) where {T<:Integer} |
| 57 | + return neg ? -v < typemin(T) : v > typemax(T) |
| 58 | +end |
| 59 | +function _check_overflows(::Type{T}, v::V, neg::Bool) where {T<:Integer,V<:Union{UInt64,UInt128}} |
| 60 | + return sizeof(T) <= sizeof(V) && (neg ? v > _unsafe_convert_int(V, typemax(T)) + one(V) : v > typemax(T)) |
| 61 | +end |
| 62 | +_check_overflows(::Type{T}, v::T, neg::Bool) where {T <: Integer} = false |
| 63 | + |
| 64 | +# `x = div(x, 10^pow, mode)`; may set code |= INEXACT for RoundThrows |
| 65 | +# x is non-negative, pow is >= 1 |
| 66 | +# `!` to signal we mutate bigints in-place |
| 67 | +function _divpow10!(x::T, code, pow, mode::RoundingMode) where {T} |
| 68 | + return div(x, _shift(one(T), pow), mode), code |
| 69 | +end |
| 70 | +function _divpow10!(x::T, code, pow, ::RoundingMode{:Throw}) where {T} |
| 71 | + q, r = divrem(x, _shift(one(T), pow)) |
| 72 | + r == 0 || (code |= Parsers.INEXACT) |
| 73 | + return q, code |
| 74 | +end |
| 75 | +function _divpow10!(x::BigInt, code, pow, ::RoundingMode{:Nearest}) |
| 76 | + # adapted from https://github.com/JuliaLang/julia/blob/112554e1a533cebad4cb0daa27df59636405c075/base/div.jl#L217 |
| 77 | + @inbounds r = Parsers.access_threaded(() -> (@static VERSION > v"1.5" ? BigInt(; nbits=256) : BigInt()), _BIGINT_Rs) # we must not yield here! |
| 78 | + @inbounds y = Parsers.access_threaded(() -> (@static VERSION > v"1.5" ? BigInt(; nbits=256) : BigInt()), _BIGINT_10s) # we must not yield here! |
| 79 | + Base.GMP.MPZ.set!(y, _BIGINT10) # y = 10 |
| 80 | + Base.GMP.MPZ.pow_ui!(y, pow) # y = y^pow |
| 81 | + Base.GMP.MPZ.tdiv_qr!(x, r, x, y) # x, r = divrem(x, y) |
| 82 | + Base.GMP.MPZ.tdiv_q!(y, _BIGINT2) # y = div(y, 2) |
| 83 | + iseven(x) && Base.GMP.MPZ.add!(y, _BIGINT1) # y = y + iseven(x) |
| 84 | + if r >= y |
| 85 | + Base.GMP.MPZ.add!(x, _BIGINT1) # x = x + (r >= y) |
| 86 | + end |
| 87 | + return x, code |
| 88 | +end |
| 89 | +function _divpow10!(x::BigInt, code, pow, ::RoundingMode{:ToZero}) |
| 90 | + @inbounds y = Parsers.access_threaded(() -> (@static VERSION > v"1.5" ? BigInt(; nbits=256) : BigInt()), _BIGINT_10s) # we must not yield here! |
| 91 | + Base.GMP.MPZ.set!(y, _BIGINT10) # y = 10 |
| 92 | + Base.GMP.MPZ.pow_ui!(y, pow) # y = y^pow |
| 93 | + Base.GMP.MPZ.tdiv_q!(x, y) # x = div(x, y) |
| 94 | + return x, code |
| 95 | +end |
| 96 | + |
| 97 | +function _divpow10!(x::BigInt, code, pow, ::RoundingMode{:Throw}) |
| 98 | + @inbounds y = Parsers.access_threaded(() -> (@static VERSION > v"1.5" ? BigInt(; nbits=256) : BigInt()), _BIGINT_10s) # we must not yield here! |
| 99 | + Base.GMP.MPZ.set!(y, _BIGINT10) # y = 10 |
| 100 | + Base.GMP.MPZ.pow_ui!(y, pow) # y = y^pow |
| 101 | + Base.GMP.MPZ.tdiv_qr!(x, y, x, y) # x, y = divrem(x, y) |
| 102 | + y == 0 || (code |= Parsers.INEXACT) |
| 103 | + return x, code |
| 104 | +end |
| 105 | + |
| 106 | +# Rescale the digits we accumulated so far into the the a an integer representing the decimal |
| 107 | +# Note the 2nd argument `FloatType` is used by Parsers.jl for _float_ parsing, but we can ignore in the fixed decimal case |
| 108 | +@inline function Parsers.scale( |
| 109 | + conf::FixedDecimalConf{T}, ::Parsers.FloatType, digits::V, exp, neg, code, ndigits, f::F, options::Parsers.Options |
| 110 | +) where {T,V,F} |
| 111 | + rounding = something(options.rounding, RoundThrows) |
| 112 | + # Positive: how many trailing zeroes we need to add to our integer |
| 113 | + # Negative: how many digits are past our precision (we need to handle them in rounding) |
| 114 | + decimal_shift = conf.f + exp |
| 115 | + # Number of digits we need to accumulate including any trailigng zeros or digits past our precision |
| 116 | + backing_integer_digits = ndigits + decimal_shift |
| 117 | + may_overflow = backing_integer_digits == _maxintdigits(T) |
| 118 | + if iszero(ndigits) |
| 119 | + # all digits are zero |
| 120 | + i = zero(T) |
| 121 | + # The backing_integer_digits == 0 case is handled in the `else` (it means |
| 122 | + # that all the digits are passed the precision but we might get `1` from rounding) |
| 123 | + elseif backing_integer_digits < 0 |
| 124 | + # All digits are past our precision, no overflow possible, but we might get an inexact |
| 125 | + i = zero(T) |
| 126 | + (rounding === RoundThrows) && (code |= Parsers.INEXACT) |
| 127 | + elseif neg && (T <: Unsigned) |
| 128 | + # Unsigned types can't represent negative numbers |
| 129 | + i = _unsafe_convert_int(T, digits) |
| 130 | + code |= Parsers.INVALID |
| 131 | + elseif backing_integer_digits > _maxintdigits(T) |
| 132 | + i = _unsafe_convert_int(T, digits) |
| 133 | + # The number of digits to accumulate is larger than the capacity of T, we overflow |
| 134 | + # We don't check for inexact here because we already have an error |
| 135 | + code |= Parsers.OVERFLOW |
| 136 | + else |
| 137 | + if decimal_shift > 0 |
| 138 | + r = _unsafe_convert_int(T, digits) |
| 139 | + i = _shift(r, decimal_shift) |
| 140 | + may_overflow && (r >= i) && (code |= Parsers.OVERFLOW) |
| 141 | + elseif decimal_shift < 0 |
| 142 | + if rounding === RoundNearest |
| 143 | + r, code = _divpow10!(digits, code, -decimal_shift, RoundNearest) |
| 144 | + elseif rounding === RoundToZero |
| 145 | + r, code = _divpow10!(digits, code, -decimal_shift, RoundToZero) |
| 146 | + else |
| 147 | + r, code = _divpow10!(digits, code, -decimal_shift, RoundThrows) |
| 148 | + end |
| 149 | + # Now that the digits were rescaled we can check for overflow |
| 150 | + # can happen e.g. if digits were unsigned ints and out type is signed |
| 151 | + may_overflow && _check_overflows(T, r, neg) && (code |= Parsers.OVERFLOW) |
| 152 | + i = _unsafe_convert_int(T, r) |
| 153 | + else |
| 154 | + may_overflow && _check_overflows(T, digits, neg) && (code |= Parsers.OVERFLOW) |
| 155 | + i = _unsafe_convert_int(T, digits) |
| 156 | + end |
| 157 | + end |
| 158 | + out = ifelse(neg, -i, i) |
| 159 | + return (out, code) |
| 160 | +end |
| 161 | + |
| 162 | +# If we only saw integer digits and not fractional or exponent digits, we just call scale with exp of 0 |
| 163 | +# To handle type conversions and overflow checks etc. |
| 164 | +@inline function Parsers.noscale(conf::FixedDecimalConf{T}, digits::Integer, neg::Bool, code, ndigits, f::F, options::Parsers.Options) where {T,F} |
| 165 | + FT = Parsers.FLOAT64 # not used by FixedDecimal parser |
| 166 | + exp = 0 |
| 167 | + return Parsers.scale(conf, FT, digits, exp, neg, code, ndigits, f, options) |
| 168 | +end |
| 169 | + |
| 170 | +# This hooks into the floating point parsing machinery from Parsers.jl, where we also accumulate |
| 171 | +# all the digits and note the effective exponent before we do "scaling" -- for FixedDecimals, |
| 172 | +# the scaling means padding the backing integer with zeros or rounding them as necessary. |
| 173 | +# We overloaded the "scale" and "noscale" methods to produce backing integers for FixedDecimals. |
| 174 | +# We return a value of T -- i.e. the _integer_ backing the FixedDecimal, the reintrpret needs to happen later |
| 175 | +@inline function Parsers.typeparser(conf::FixedDecimalConf{T}, source, pos, len, b, code, pl, options) where {T<:Integer} |
| 176 | + if !(options.rounding in (nothing, RoundNearest, RoundToZero, RoundThrows)) |
| 177 | + throw(ArgumentError("Unhandled rounding mode $(options.rounding)")) |
| 178 | + end |
| 179 | + |
| 180 | + startpos = pos |
| 181 | + # begin parsing |
| 182 | + neg = b == UInt8('-') |
| 183 | + if neg || b == UInt8('+') |
| 184 | + pos += 1 |
| 185 | + Parsers.incr!(source) |
| 186 | + if Parsers.eof(source, pos, len) |
| 187 | + code |= Parsers.INVALID | Parsers.EOF |
| 188 | + x = zero(T) |
| 189 | + @goto done |
| 190 | + end |
| 191 | + b = Parsers.peekbyte(source, pos) |
| 192 | + else |
| 193 | + # Check if the input is empty |
| 194 | + if Parsers.eof(source, pos, len) |
| 195 | + code |= Parsers.INVALID | Parsers.EOF |
| 196 | + x = zero(T) |
| 197 | + @goto done |
| 198 | + end |
| 199 | + end |
| 200 | + |
| 201 | + if (b - UInt8('0')) <= 0x09 || b == options.decimal |
| 202 | + x, code, pos = Parsers.parsedigits(conf, source, pos, len, b, code, options, UInt64(0), neg, startpos, true, 0, nothing) |
| 203 | + else |
| 204 | + x = zero(T) |
| 205 | + code |= Parsers.INVALID |
| 206 | + end |
| 207 | + @label done |
| 208 | + return pos, code, Parsers.PosLen(pl.pos, pos - pl.pos), x |
| 209 | +end |
| 210 | + |
| 211 | +function _base_parse(::Type{FD{T, f}}, source::AbstractString, mode::RoundingMode=RoundNearest) where {T, f} |
| 212 | + if !(mode in (RoundThrows, RoundNearest, RoundToZero)) |
| 213 | + throw(ArgumentError("Unhandled rounding mode $mode")) |
| 214 | + end |
| 215 | + |
| 216 | + bytes = codeunits(source) |
| 217 | + options = mode === RoundNearest ? OPTIONS_ROUND_NEAREST : |
| 218 | + mode === RoundToZero ? OPTIONS_ROUND_TO_ZERO : |
| 219 | + OPTIONS_ROUND_THROWS |
| 220 | + res = Parsers.xparse2(FD{T, f}, bytes, 1, length(bytes), options) |
| 221 | + return res |
| 222 | +end |
| 223 | + |
| 224 | +function Base.tryparse(::Type{FD{T, f}}, source::AbstractString, mode::RoundingMode=RoundNearest) where {T, f} |
| 225 | + isempty(source) && return nothing |
| 226 | + res = _base_parse(FD{T, f}, source, mode) |
| 227 | + # If we didn't reach eof, there was some garbage at the end of the string after something that looked like a number |
| 228 | + return (Parsers.eof(res.code) && Parsers.ok(res.code)) ? res.val : nothing |
| 229 | +end |
| 230 | + |
| 231 | +function Base.parse(::Type{FD{T, f}}, source::AbstractString, mode::RoundingMode=RoundNearest) where {T, f} |
| 232 | + isempty(source) && throw(ArgumentError("Empty input is not allowed")) |
| 233 | + res = _base_parse(FD{T, f}, source, mode) |
| 234 | + Parsers.inexact(res.code) && throw(InexactError(:parse, FD{T, f}, source)) |
| 235 | + Parsers.overflow(res.code) && throw(OverflowError("overflow parsing $(repr(source)) as $(FD{T, f})")) |
| 236 | + # If we didn't reach eof, there was some garbage at the end of the string after something that looked like a number |
| 237 | + (!Parsers.eof(res.code) || Parsers.invalid(res.code)) && throw(ArgumentError("cannot parse $(repr(source)) as $(FD{T, f})")) |
| 238 | + return res.val |
| 239 | +end |
0 commit comments