Skip to content

Commit a780608

Browse files
committed
Add ability to ignore repeated delimiters
1 parent 90d73ed commit a780608

File tree

2 files changed

+42
-6
lines changed

2 files changed

+42
-6
lines changed

src/Parsers.jl

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -283,30 +283,47 @@ end
283283
Parsing on a `Parsers.Delimited` will first call `Parsers.parse!(d.next, io, result; kwargs...)`, then expect the next bytes to be one of the expected `delims` arguments.
284284
If one of `delims` is not found, the result is `Parsers.INVALID`, but parsing will continue until a valid `delims` is found. An `eof(io)` is _always_ considered a valid termination state in place of a delimiter.
285285
"""
286-
struct Delimited{I, T <: Trie} <: Layer
286+
struct Delimited{IR, I, T <: Trie} <: Layer
287287
next::I
288288
delims::T
289289
end
290-
Delimited(next, delims::Union{Char, String}...=',') = Delimited(next, Trie(String[string(d) for d in delims], DELIMITED))
291-
Delimited(delims::Union{Char, String}...=',') = Delimited(defaultparser, Trie(String[string(d) for d in delims], DELIMITED))
290+
Delimited(ignore_repeated::Bool, next::I, delims::T) where {I, T <: Trie} = Delimited{ignore_repeated, I, T}(next, delims)
291+
Delimited(next::Union{Layer, Base.Callable}=defaultparser, delims::Union{Char, String}...; ignore_repeated::Bool=false) = Delimited(ignore_repeated, next, Trie(String[string(d) for d in (isempty(delims) ? (",",) : delims)], DELIMITED))
292+
Delimited(delims::Union{Char, String}...; ignore_repeated::Bool=false) = Delimited(ignore_repeated, defaultparser, Trie(String[string(d) for d in (isempty(delims) ? (",",) : delims)], DELIMITED))
292293

293-
@inline function parse!(d::Delimited, io::IO, r::Result{T}; kwargs...) where {T}
294+
@inline function parse!(d::Delimited{ignore_repeated}, io::IO, r::Result{T}; kwargs...) where {ignore_repeated, T}
294295
# @debug "xparse Delimited - $T"
295296
parse!(d.next, io, r; kwargs...)
296297
# @debug "Delimited - $T: r.code=$(r.code), r.result=$(r.result)"
297298
if eof(io)
298299
r.code |= EOF
299300
return r
300301
end
301-
match!(d.delims, io, r, false) && return r
302+
if ignore_repeated
303+
matched = false
304+
while match!(d.delims, io, r, false)
305+
matched = true
306+
end
307+
matched && return r
308+
else
309+
match!(d.delims, io, r, false) && return r
310+
end
302311
# @debug "didn't find delimiters at expected location; result is invalid, parsing until delimiter is found"
303312
while true
304313
b = readbyte(io)
305314
if eof(io)
306315
r.code |= EOF
307316
break
308317
end
309-
match!(d.delims, io, r, false) && break
318+
if ignore_repeated
319+
matched = false
320+
while match!(d.delims, io, r, false)
321+
matched = true
322+
end
323+
matched && break
324+
else
325+
match!(d.delims, io, r, false) && break
326+
end
310327
end
311328
r.code |= INVALID_DELIMITER
312329
return r

test/runtests.jl

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,25 @@ let io=IOBuffer("1,2,null,4"), layers=Parsers.Delimited(Parsers.Quoted(Parsers.S
557557
@test r.pos == 9
558558
end
559559

560+
let io=IOBuffer("1,,,2,null,4"), layers=Parsers.Delimited(Parsers.Quoted(Parsers.Sentinel(["null"])); ignore_repeated=true)
561+
r = Parsers.parse(layers, io, Int)
562+
@test r.result === 1
563+
@test r.code === OK | DELIMITED
564+
@test r.pos == 0
565+
r = Parsers.parse(layers, io, Int)
566+
@test r.result === 2
567+
@test r.code === OK | DELIMITED
568+
@test r.pos == 4
569+
r = Parsers.parse(layers, io, Int)
570+
@test r.result === missing
571+
@test r.code === SENTINEL | DELIMITED
572+
@test r.pos == 6
573+
r = Parsers.parse(layers, io, Int)
574+
@test r.result === 4
575+
@test r.code === OK | EOF
576+
@test r.pos == 11
577+
end
578+
560579
end # @testset
561580

562581
end # @testset

0 commit comments

Comments
 (0)