Skip to content

Commit 7e94dc5

Browse files
committed
optimized numeric parsing
1 parent 1fe098f commit 7e94dc5

File tree

4 files changed

+135
-1
lines changed

4 files changed

+135
-1
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,5 +37,5 @@ julia> abc = StringView(0x61:0x63) # and for other array types
3737
```
3838

3939
Other optimized (copy-free) operations include I/O, hashing, iteration/indexing,
40-
comparisons, and validation. Working with a `SubString` of a `StringView` is
40+
comparisons, parsing, and validation. Working with a `SubString` of a `StringView` is
4141
similarly efficient.

src/StringViews.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,5 +113,6 @@ end
113113

114114
include("decoding.jl")
115115
include("regex.jl")
116+
include("parse.jl")
116117

117118
end # module

src/parse.jl

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
# optimized parsing functions, copied from julia/base/parse.jl
2+
3+
import Base: tryparse, tryparse_internal
4+
5+
function tryparse(::Type{Float64}, s::DenseStringViewAndSub)
6+
hasvalue, val = ccall(:jl_try_substrtod, Tuple{Bool, Float64},
7+
(Ptr{UInt8},Csize_t,Csize_t), s, 0, sizeof(s))
8+
hasvalue ? val : nothing
9+
end
10+
function tryparse_internal(::Type{Float64}, s::DenseStringViewAndSub, startpos::Int, endpos::Int)
11+
hasvalue, val = ccall(:jl_try_substrtod, Tuple{Bool, Float64},
12+
(Ptr{UInt8},Csize_t,Csize_t), s, startpos-1, endpos-startpos+1)
13+
hasvalue ? val : nothing
14+
end
15+
function tryparse(::Type{Float32}, s::DenseStringViewAndSub)
16+
hasvalue, val = ccall(:jl_try_substrtof, Tuple{Bool, Float32},
17+
(Ptr{UInt8},Csize_t,Csize_t), s, 0, sizeof(s))
18+
hasvalue ? val : nothing
19+
end
20+
function tryparse_internal(::Type{Float32}, s::DenseStringViewAndSub, startpos::Int, endpos::Int)
21+
hasvalue, val = ccall(:jl_try_substrtof, Tuple{Bool, Float32},
22+
(Ptr{UInt8},Csize_t,Csize_t), s, startpos-1, endpos-startpos+1)
23+
hasvalue ? val : nothing
24+
end
25+
26+
function tryparse_internal(::Type{Complex{T}}, s::DenseStringViewAndSub, i::Int, e::Int, raise::Bool) where {T<:Real}
27+
# skip initial whitespace
28+
while i e && isspace(s[i])
29+
i = nextind(s, i)
30+
end
31+
if i > e
32+
raise && throw(ArgumentError("input string is empty or only contains whitespace"))
33+
return nothing
34+
end
35+
36+
# find index of ± separating real/imaginary parts (if any)
37+
i₊ = something(findnext(in(('+','-')), s, i), 0)
38+
if i₊ == i # leading ± sign
39+
i₊ = something(findnext(in(('+','-')), s, i₊+1), 0)
40+
end
41+
if i₊ != 0 && s[i₊-1] in ('e','E') # exponent sign
42+
i₊ = something(findnext(in(('+','-')), s, i₊+1), 0)
43+
end
44+
45+
# find trailing im/i/j
46+
iᵢ = something(findprev(in(('m','i','j')), s, e), 0)
47+
if iᵢ > 0 && s[iᵢ] == 'm' # im
48+
iᵢ -= 1
49+
if s[iᵢ] != 'i'
50+
raise && throw(ArgumentError("expected trailing \"im\", found only \"m\""))
51+
return nothing
52+
end
53+
end
54+
55+
if i₊ == 0 # purely real or imaginary value
56+
if iᵢ > i && !(iᵢ == i+1 && s[i] in ('+','-')) # purely imaginary (not "±inf")
57+
x = tryparse_internal(T, s, i, iᵢ-1, raise)
58+
x === nothing && return nothing
59+
return Complex{T}(zero(x),x)
60+
else # purely real
61+
x = tryparse_internal(T, s, i, e, raise)
62+
x === nothing && return nothing
63+
return Complex{T}(x)
64+
end
65+
end
66+
67+
if iᵢ < i₊
68+
raise && throw(ArgumentError("missing imaginary unit"))
69+
return nothing # no imaginary part
70+
end
71+
72+
# parse real part
73+
re = tryparse_internal(T, s, i, i₊-1, raise)
74+
re === nothing && return nothing
75+
76+
# parse imaginary part
77+
im = tryparse_internal(T, s, i₊+1, iᵢ-1, raise)
78+
im === nothing && return nothing
79+
80+
return Complex{T}(re, s[i₊]=='-' ? -im : im)
81+
end
82+
83+
function tryparse_internal(::Type{Bool}, sbuff::DenseStringViewAndSub,
84+
startpos::Int, endpos::Int, base::Integer, raise::Bool)
85+
if isempty(sbuff)
86+
raise && throw(ArgumentError("input string is empty"))
87+
return nothing
88+
end
89+
90+
if isnumeric(sbuff[1])
91+
intres = tryparse_internal(UInt8, sbuff, startpos, endpos, base, false)
92+
(intres == 1) && return true
93+
(intres == 0) && return false
94+
raise && throw(ArgumentError("invalid Bool representation: $(repr(sbuff))"))
95+
end
96+
97+
orig_start = startpos
98+
orig_end = endpos
99+
100+
# Ignore leading and trailing whitespace
101+
while isspace(sbuff[startpos]) && startpos <= endpos
102+
startpos = nextind(sbuff, startpos)
103+
end
104+
while isspace(sbuff[endpos]) && endpos >= startpos
105+
endpos = prevind(sbuff, endpos)
106+
end
107+
108+
len = endpos - startpos + 1
109+
p = pointer(sbuff) + startpos - 1
110+
GC.@preserve sbuff begin
111+
(len == 4) && (0 == Base._memcmp(p, "true", 4)) && (return true)
112+
(len == 5) && (0 == Base._memcmp(p, "false", 5)) && (return false)
113+
end
114+
115+
if raise
116+
substr = SubString(sbuff, orig_start, orig_end) # show input string in the error to avoid confusion
117+
if all(isspace, substr)
118+
throw(ArgumentError("input string only contains whitespace"))
119+
else
120+
throw(ArgumentError("invalid Bool representation: $(repr(substr))"))
121+
end
122+
end
123+
return nothing
124+
end

test/runtests.jl

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,15 @@ end
8080
@test findnext(r"[aeiou]+", ss, 1) == 1:2
8181
end
8282

83+
@testset "parsing" begin
84+
for val in (true, 1234, 1234.5, 1234.5f0, 4.5+3.25im)
85+
sval = string(val)
86+
for str in (StringView(sval), SubString("foo"*sval*"bar", 4, 3+length(sval)))
87+
@test parse(typeof(val), str) === val
88+
end
89+
end
90+
end
91+
8392
@testset "miscellaneous" begin
8493
@test cmp("foobar","bar") == cmp(s,"bar") == -cmp("bar",s) == cmp(s,StringView("bar"))
8594
@test s == StringView("foobar") == "foobar" == s == "foobar" != StringView("bar")

0 commit comments

Comments
 (0)