Skip to content

Commit 75c7226

Browse files
authored
Merge pull request #80 from Drvi/td-new-parser
Use a `Parsers.jl`-based parser implementation
2 parents 8233b75 + f9a7bd8 commit 75c7226

File tree

6 files changed

+986
-226
lines changed

6 files changed

+986
-226
lines changed

.github/workflows/CI.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ jobs:
1313
fail-fast: false
1414
matrix:
1515
version:
16-
- '1.0'
16+
- '1.6'
1717
- '1'
1818
# - 'nightly'
1919
os:

Project.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
name = "FixedPointDecimals"
22
uuid = "fb4d412d-6eee-574d-9565-ede6634db7b0"
33
authors = ["Fengyang Wang <fengyang.wang.0@gmail.com>", "Curtis Vogt <curtis.vogt@gmail.com>"]
4-
version = "0.4.2"
4+
version = "0.4.3"
5+
6+
[deps]
7+
Parsers = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
58

69
[compat]
10+
Parsers = "2.7"
711
julia = "1.6"
812

913
[extras]

src/FixedPointDecimals.jl

Lines changed: 11 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ module FixedPointDecimals
2828
export FixedDecimal, RoundThrows
2929

3030
using Base: decompose, BitInteger
31+
import Parsers
3132

3233
# floats that support fma and are roughly IEEE-like
3334
const FMAFloat = Union{Float16, Float32, Float64, BigFloat}
@@ -100,6 +101,16 @@ end
100101

101102
const FD = FixedDecimal
102103

104+
include("parse.jl")
105+
106+
function __init__()
107+
nt = isdefined(Base.Threads, :maxthreadid) ? Threads.maxthreadid() : Threads.nthreads()
108+
# Buffers used in parsing when dealing with BigInts, see _divpow10! in parse.jl
109+
resize!(empty!(_BIGINT_10s), nt)
110+
resize!(empty!(_BIGINT_Rs), nt)
111+
return
112+
end
113+
103114
(::Type{T})(x::Real) where {T <: FD} = convert(T, x)
104115

105116
floattype(::Type{<:FD{T}}) where {T<:Union{Int8, UInt8, Int16, UInt16}} = Float32
@@ -413,78 +424,6 @@ function Base.show(io::IO, x::FD{T, f}) where {T, f}
413424
end
414425
end
415426

416-
# parsing
417-
418-
"""
419-
RoundThrows
420-
421-
Raises an `InexactError` if any rounding is necessary.
422-
"""
423-
const RoundThrows = RoundingMode{:Throw}()
424-
425-
function Base.parse(::Type{FD{T, f}}, str::AbstractString, mode::RoundingMode=RoundNearest) where {T, f}
426-
if !(mode in (RoundThrows, RoundNearest, RoundToZero))
427-
throw(ArgumentError("Unhandled rounding mode $mode"))
428-
end
429-
430-
# Parse exponent information
431-
exp_index = something(findfirst(==('e'), str), 0)
432-
if exp_index > 0
433-
exp = parse(Int, str[(exp_index + 1):end])
434-
sig_end = exp_index - 1
435-
else
436-
exp = 0
437-
sig_end = lastindex(str)
438-
end
439-
440-
# Remove the decimal place from the string
441-
sign = T(first(str) == '-' ? -1 : 1)
442-
dec_index = something(findfirst(==('.'), str), 0)
443-
sig_start = sign < 0 ? 2 : 1
444-
if dec_index > 0
445-
int_str = str[sig_start:(dec_index - 1)] * str[(dec_index + 1):sig_end]
446-
exp -= sig_end - dec_index
447-
else
448-
int_str = str[sig_start:sig_end]
449-
end
450-
451-
# Split the integer string into the value we can represent inside the FixedDecimal and
452-
# the remaining digits we'll use during rounding
453-
int_end = lastindex(int_str)
454-
pivot = int_end + exp - (-f)
455-
456-
a = rpad(int_str[1:min(pivot, int_end)], pivot, '0')
457-
b = lpad(int_str[max(pivot, 1):int_end], int_end - pivot + 1, '0')
458-
459-
# Parse the strings
460-
val = isempty(a) ? T(0) : sign * parse(T, a)
461-
if !isempty(b) && any(!isequal('0'), b[2:end])
462-
if mode == RoundThrows
463-
throw(InexactError(:parse, FD{T, f}, str))
464-
elseif mode == RoundNearest
465-
val += sign * parse_round(T, b, mode)
466-
end
467-
end
468-
469-
reinterpret(FD{T, f}, val)
470-
end
471-
472-
function parse_round(::Type{T}, fractional::AbstractString, ::RoundingMode{:Nearest}) where T
473-
# Note: parsing each digit individually ensures we don't run into an OverflowError
474-
digits = Int8[parse(Int8, d) for d in fractional]
475-
for i in length(digits):-1:2
476-
if digits[i] > 5 || digits[i] == 5 && isodd(digits[i - 1])
477-
if i - 1 == 1
478-
return T(1)
479-
else
480-
digits[i - 1] += 1
481-
end
482-
end
483-
end
484-
return T(0)
485-
end
486-
487-
488427
"""
489428
max_exp10(T)
490429

src/parse.jl

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
using Parsers
2+
using Parsers: AbstractConf, Result
3+
4+
"""
5+
RoundThrows
6+
7+
Raises an `InexactError` if any rounding is necessary.
8+
"""
9+
const RoundThrows = RoundingMode{:Throw}()
10+
11+
# make our own conf struct to avoid specializing Parsers.typeparser on each unique precision value
12+
struct FixedDecimalConf{T<:Integer} <: AbstractConf{T}
13+
f::Int
14+
end
15+
# This overload says that when parsing a FixedDecimal type, use our new custom FixedDecimalConf type
16+
Parsers.conf(::Type{FixedDecimal{T,f}}, opts::Parsers.Options, kw...) where {T<:Integer,f} = FixedDecimalConf{T}(f)
17+
# Because the value returned from our `typeparser` isn't a FixedDecimal, we overload here to show we're returning an integer type
18+
Parsers.returntype(::Type{FixedDecimal{T,f}}) where {T,f} = T
19+
# This overload allows us to take the Result{IntegerType} returned from typeparser and turn it into a FixedDecimal Result
20+
function Parsers.result(FD::Type{FixedDecimal{T,f}}, res::Parsers.Result{T}) where {T,f}
21+
return Parsers.invalid(res.code) ? Result{FD}(res.code, res.tlen) :
22+
Result{FD}(res.code, res.tlen, reinterpret(FD, res.val))
23+
end
24+
# Tell Parsers that we can use our custom typeparser and not rely on Base.tryparse
25+
Parsers.supportedtype(::Type{<:FixedDecimal}) = true
26+
27+
const OPTIONS_ROUND_NEAREST = Parsers.Options(rounding=RoundNearest)
28+
const OPTIONS_ROUND_TO_ZERO = Parsers.Options(rounding=RoundToZero)
29+
const OPTIONS_ROUND_THROWS = Parsers.Options(rounding=nothing)
30+
31+
# TODO: a lookup table per type would be faster
32+
@inline _shift(n::T, decpos) where {T} = T(10)^decpos * n
33+
34+
const _BIGINT1 = BigInt(1)
35+
const _BIGINT2 = BigInt(2)
36+
const _BIGINT10 = BigInt(10)
37+
const _BIGINT_10s = BigInt[] # buffer for "remainders" in _divpow10!, accessed via `Parsers.access_threaded`
38+
const _BIGINT_Rs = BigInt[] # buffer for "remainders" in _divpow10!, accessed via `Parsers.access_threaded`
39+
40+
for T in (Base.BitSigned_types..., Base.BitUnsigned_types...)
41+
let bytes = Tuple(codeunits(string(typemax(T))))
42+
# The number of digits an integer of type T can hold
43+
@eval _maxintdigits(::Type{$T}) = $(length(bytes))
44+
end
45+
end
46+
47+
# All `v`s are non-negative
48+
function _unsafe_convert_int(::Type{T}, v::V) where {T<:Integer,V<:Integer}
49+
return sizeof(T) > sizeof(V) ? T(v) :
50+
sizeof(T) < sizeof(V) ? unsafe_trunc(T, v) :
51+
Base.bitcast(T, v)
52+
end
53+
_unsafe_convert_int(::Type{T}, v::BigInt) where {T<:Integer} = unsafe_trunc(T, v)
54+
_unsafe_convert_int(::Type{T}, v::T) where {T<:Integer} = v
55+
56+
function _check_overflows(::Type{T}, v::BigInt, neg::Bool) where {T<:Integer}
57+
return neg ? -v < typemin(T) : v > typemax(T)
58+
end
59+
function _check_overflows(::Type{T}, v::V, neg::Bool) where {T<:Integer,V<:Union{UInt64,UInt128}}
60+
return sizeof(T) <= sizeof(V) && (neg ? v > _unsafe_convert_int(V, typemax(T)) + one(V) : v > typemax(T))
61+
end
62+
_check_overflows(::Type{T}, v::T, neg::Bool) where {T <: Integer} = false
63+
64+
# `x = div(x, 10^pow, mode)`; may set code |= INEXACT for RoundThrows
65+
# x is non-negative, pow is >= 1
66+
# `!` to signal we mutate bigints in-place
67+
function _divpow10!(x::T, code, pow, mode::RoundingMode) where {T}
68+
return div(x, _shift(one(T), pow), mode), code
69+
end
70+
function _divpow10!(x::T, code, pow, ::RoundingMode{:Throw}) where {T}
71+
q, r = divrem(x, _shift(one(T), pow))
72+
r == 0 || (code |= Parsers.INEXACT)
73+
return q, code
74+
end
75+
function _divpow10!(x::BigInt, code, pow, ::RoundingMode{:Nearest})
76+
# adapted from https://github.com/JuliaLang/julia/blob/112554e1a533cebad4cb0daa27df59636405c075/base/div.jl#L217
77+
@inbounds r = Parsers.access_threaded(() -> (@static VERSION > v"1.5" ? BigInt(; nbits=256) : BigInt()), _BIGINT_Rs) # we must not yield here!
78+
@inbounds y = Parsers.access_threaded(() -> (@static VERSION > v"1.5" ? BigInt(; nbits=256) : BigInt()), _BIGINT_10s) # we must not yield here!
79+
Base.GMP.MPZ.set!(y, _BIGINT10) # y = 10
80+
Base.GMP.MPZ.pow_ui!(y, pow) # y = y^pow
81+
Base.GMP.MPZ.tdiv_qr!(x, r, x, y) # x, r = divrem(x, y)
82+
Base.GMP.MPZ.tdiv_q!(y, _BIGINT2) # y = div(y, 2)
83+
iseven(x) && Base.GMP.MPZ.add!(y, _BIGINT1) # y = y + iseven(x)
84+
if r >= y
85+
Base.GMP.MPZ.add!(x, _BIGINT1) # x = x + (r >= y)
86+
end
87+
return x, code
88+
end
89+
function _divpow10!(x::BigInt, code, pow, ::RoundingMode{:ToZero})
90+
@inbounds y = Parsers.access_threaded(() -> (@static VERSION > v"1.5" ? BigInt(; nbits=256) : BigInt()), _BIGINT_10s) # we must not yield here!
91+
Base.GMP.MPZ.set!(y, _BIGINT10) # y = 10
92+
Base.GMP.MPZ.pow_ui!(y, pow) # y = y^pow
93+
Base.GMP.MPZ.tdiv_q!(x, y) # x = div(x, y)
94+
return x, code
95+
end
96+
97+
function _divpow10!(x::BigInt, code, pow, ::RoundingMode{:Throw})
98+
@inbounds y = Parsers.access_threaded(() -> (@static VERSION > v"1.5" ? BigInt(; nbits=256) : BigInt()), _BIGINT_10s) # we must not yield here!
99+
Base.GMP.MPZ.set!(y, _BIGINT10) # y = 10
100+
Base.GMP.MPZ.pow_ui!(y, pow) # y = y^pow
101+
Base.GMP.MPZ.tdiv_qr!(x, y, x, y) # x, y = divrem(x, y)
102+
y == 0 || (code |= Parsers.INEXACT)
103+
return x, code
104+
end
105+
106+
# Rescale the digits we accumulated so far into the the a an integer representing the decimal
107+
# Note the 2nd argument `FloatType` is used by Parsers.jl for _float_ parsing, but we can ignore in the fixed decimal case
108+
@inline function Parsers.scale(
109+
conf::FixedDecimalConf{T}, ::Parsers.FloatType, digits::V, exp, neg, code, ndigits, f::F, options::Parsers.Options
110+
) where {T,V,F}
111+
rounding = something(options.rounding, RoundThrows)
112+
# Positive: how many trailing zeroes we need to add to our integer
113+
# Negative: how many digits are past our precision (we need to handle them in rounding)
114+
decimal_shift = conf.f + exp
115+
# Number of digits we need to accumulate including any trailigng zeros or digits past our precision
116+
backing_integer_digits = ndigits + decimal_shift
117+
may_overflow = backing_integer_digits == _maxintdigits(T)
118+
if iszero(ndigits)
119+
# all digits are zero
120+
i = zero(T)
121+
# The backing_integer_digits == 0 case is handled in the `else` (it means
122+
# that all the digits are passed the precision but we might get `1` from rounding)
123+
elseif backing_integer_digits < 0
124+
# All digits are past our precision, no overflow possible, but we might get an inexact
125+
i = zero(T)
126+
(rounding === RoundThrows) && (code |= Parsers.INEXACT)
127+
elseif neg && (T <: Unsigned)
128+
# Unsigned types can't represent negative numbers
129+
i = _unsafe_convert_int(T, digits)
130+
code |= Parsers.INVALID
131+
elseif backing_integer_digits > _maxintdigits(T)
132+
i = _unsafe_convert_int(T, digits)
133+
# The number of digits to accumulate is larger than the capacity of T, we overflow
134+
# We don't check for inexact here because we already have an error
135+
code |= Parsers.OVERFLOW
136+
else
137+
if decimal_shift > 0
138+
r = _unsafe_convert_int(T, digits)
139+
i = _shift(r, decimal_shift)
140+
may_overflow && (r >= i) && (code |= Parsers.OVERFLOW)
141+
elseif decimal_shift < 0
142+
if rounding === RoundNearest
143+
r, code = _divpow10!(digits, code, -decimal_shift, RoundNearest)
144+
elseif rounding === RoundToZero
145+
r, code = _divpow10!(digits, code, -decimal_shift, RoundToZero)
146+
else
147+
r, code = _divpow10!(digits, code, -decimal_shift, RoundThrows)
148+
end
149+
# Now that the digits were rescaled we can check for overflow
150+
# can happen e.g. if digits were unsigned ints and out type is signed
151+
may_overflow && _check_overflows(T, r, neg) && (code |= Parsers.OVERFLOW)
152+
i = _unsafe_convert_int(T, r)
153+
else
154+
may_overflow && _check_overflows(T, digits, neg) && (code |= Parsers.OVERFLOW)
155+
i = _unsafe_convert_int(T, digits)
156+
end
157+
end
158+
out = ifelse(neg, -i, i)
159+
return (out, code)
160+
end
161+
162+
# If we only saw integer digits and not fractional or exponent digits, we just call scale with exp of 0
163+
# To handle type conversions and overflow checks etc.
164+
@inline function Parsers.noscale(conf::FixedDecimalConf{T}, digits::Integer, neg::Bool, code, ndigits, f::F, options::Parsers.Options) where {T,F}
165+
FT = Parsers.FLOAT64 # not used by FixedDecimal parser
166+
exp = 0
167+
return Parsers.scale(conf, FT, digits, exp, neg, code, ndigits, f, options)
168+
end
169+
170+
# This hooks into the floating point parsing machinery from Parsers.jl, where we also accumulate
171+
# all the digits and note the effective exponent before we do "scaling" -- for FixedDecimals,
172+
# the scaling means padding the backing integer with zeros or rounding them as necessary.
173+
# We overloaded the "scale" and "noscale" methods to produce backing integers for FixedDecimals.
174+
# We return a value of T -- i.e. the _integer_ backing the FixedDecimal, the reintrpret needs to happen later
175+
@inline function Parsers.typeparser(conf::FixedDecimalConf{T}, source, pos, len, b, code, pl, options) where {T<:Integer}
176+
if !(options.rounding in (nothing, RoundNearest, RoundToZero, RoundThrows))
177+
throw(ArgumentError("Unhandled rounding mode $(options.rounding)"))
178+
end
179+
180+
startpos = pos
181+
# begin parsing
182+
neg = b == UInt8('-')
183+
if neg || b == UInt8('+')
184+
pos += 1
185+
Parsers.incr!(source)
186+
if Parsers.eof(source, pos, len)
187+
code |= Parsers.INVALID | Parsers.EOF
188+
x = zero(T)
189+
@goto done
190+
end
191+
b = Parsers.peekbyte(source, pos)
192+
else
193+
# Check if the input is empty
194+
if Parsers.eof(source, pos, len)
195+
code |= Parsers.INVALID | Parsers.EOF
196+
x = zero(T)
197+
@goto done
198+
end
199+
end
200+
201+
if (b - UInt8('0')) <= 0x09 || b == options.decimal
202+
x, code, pos = Parsers.parsedigits(conf, source, pos, len, b, code, options, UInt64(0), neg, startpos, true, 0, nothing)
203+
else
204+
x = zero(T)
205+
code |= Parsers.INVALID
206+
end
207+
@label done
208+
return pos, code, Parsers.PosLen(pl.pos, pos - pl.pos), x
209+
end
210+
211+
function _base_parse(::Type{FD{T, f}}, source::AbstractString, mode::RoundingMode=RoundNearest) where {T, f}
212+
if !(mode in (RoundThrows, RoundNearest, RoundToZero))
213+
throw(ArgumentError("Unhandled rounding mode $mode"))
214+
end
215+
216+
bytes = codeunits(source)
217+
options = mode === RoundNearest ? OPTIONS_ROUND_NEAREST :
218+
mode === RoundToZero ? OPTIONS_ROUND_TO_ZERO :
219+
OPTIONS_ROUND_THROWS
220+
res = Parsers.xparse2(FD{T, f}, bytes, 1, length(bytes), options)
221+
return res
222+
end
223+
224+
function Base.tryparse(::Type{FD{T, f}}, source::AbstractString, mode::RoundingMode=RoundNearest) where {T, f}
225+
isempty(source) && return nothing
226+
res = _base_parse(FD{T, f}, source, mode)
227+
# If we didn't reach eof, there was some garbage at the end of the string after something that looked like a number
228+
return (Parsers.eof(res.code) && Parsers.ok(res.code)) ? res.val : nothing
229+
end
230+
231+
function Base.parse(::Type{FD{T, f}}, source::AbstractString, mode::RoundingMode=RoundNearest) where {T, f}
232+
isempty(source) && throw(ArgumentError("Empty input is not allowed"))
233+
res = _base_parse(FD{T, f}, source, mode)
234+
Parsers.inexact(res.code) && throw(InexactError(:parse, FD{T, f}, source))
235+
Parsers.overflow(res.code) && throw(OverflowError("overflow parsing $(repr(source)) as $(FD{T, f})"))
236+
# If we didn't reach eof, there was some garbage at the end of the string after something that looked like a number
237+
(!Parsers.eof(res.code) || Parsers.invalid(res.code)) && throw(ArgumentError("cannot parse $(repr(source)) as $(FD{T, f})"))
238+
return res.val
239+
end

0 commit comments

Comments
 (0)