Skip to content

Commit 5faa51b

Browse files
Add AbstractPattern and AbstractMatch to allow for more general pattern matching (#38108)
Co-authored-by: ScottPJones <scottjones@alum.mit.edu>
1 parent 1994981 commit 5faa51b

File tree

5 files changed

+28
-12
lines changed

5 files changed

+28
-12
lines changed

base/broadcast.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -675,7 +675,7 @@ julia> Broadcast.broadcastable("hello") # Strings break convention of matching i
675675
Base.RefValue{String}("hello")
676676
```
677677
"""
678-
broadcastable(x::Union{Symbol,AbstractString,Function,UndefInitializer,Nothing,RoundingMode,Missing,Val,Ptr,Regex,Pair}) = Ref(x)
678+
broadcastable(x::Union{Symbol,AbstractString,Function,UndefInitializer,Nothing,RoundingMode,Missing,Val,Ptr,AbstractPattern,Pair}) = Ref(x)
679679
broadcastable(::Type{T}) where {T} = Ref{Type{T}}(T)
680680
broadcastable(x::Union{AbstractArray,Number,Ref,Tuple,Broadcasted}) = x
681681
# Default to collecting iterables — which will error for non-iterables

base/exports.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ export
2222
AbstractVector,
2323
AbstractVecOrMat,
2424
Array,
25+
AbstractMatch,
26+
AbstractPattern,
2527
AbstractDict,
2628
BigFloat,
2729
BigInt,

base/regex.jl

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,13 @@ include("pcre.jl")
77
const DEFAULT_COMPILER_OPTS = PCRE.UTF | PCRE.NO_UTF_CHECK | PCRE.ALT_BSUX | PCRE.UCP
88
const DEFAULT_MATCH_OPTS = PCRE.NO_UTF_CHECK
99

10+
"""
11+
An abstract type representing any sort of pattern matching expression (typically a regular
12+
expression).
13+
`AbstractPattern` objects can be used to match strings with [`match`](@ref).
14+
"""
15+
abstract type AbstractPattern end
16+
1017
"""
1118
Regex(pattern[, flags])
1219
@@ -17,7 +24,7 @@ with [`match`](@ref).
1724
`Regex(pattern[, flags])` constructor is usually used if the `pattern` string needs
1825
to be interpolated. See the documentation of the string macro for details on flags.
1926
"""
20-
mutable struct Regex
27+
mutable struct Regex <: AbstractPattern
2128
pattern::String
2229
compile_options::UInt32
2330
match_options::UInt32
@@ -128,10 +135,16 @@ function show(io::IO, re::Regex)
128135
end
129136
end
130137

138+
"""
139+
`AbstractMatch` objects are used to represent information about matches found in a string
140+
using an `AbstractPattern`.
141+
"""
142+
abstract type AbstractMatch end
143+
131144
# TODO: map offsets into strings in other encodings back to original indices.
132145
# or maybe it's better to just fail since that would be quite slow
133146

134-
struct RegexMatch
147+
struct RegexMatch <: AbstractMatch
135148
match::SubString{String}
136149
captures::Vector{Union{Nothing,SubString{String}}}
137150
offset::Int
@@ -278,7 +291,8 @@ true
278291
"""
279292
function match end
280293

281-
function match(re::Regex, str::Union{SubString{String}, String}, idx::Integer, add_opts::UInt32=UInt32(0))
294+
function match(re::Regex, str::Union{SubString{String}, String}, idx::Integer,
295+
add_opts::UInt32=UInt32(0))
282296
compile(re)
283297
opts = re.match_options | add_opts
284298
matched, data = PCRE.exec_r_data(re.regex, str, idx-1, opts)
@@ -336,7 +350,7 @@ findfirst(r::Regex, s::AbstractString) = findnext(r,s,firstindex(s))
336350

337351
"""
338352
findall(
339-
pattern::Union{AbstractString,Regex},
353+
pattern::Union{AbstractString,AbstractPattern},
340354
string::AbstractString;
341355
overlap::Bool = false,
342356
)
@@ -365,7 +379,7 @@ julia> findall("a", "banana")
365379
6:6
366380
```
367381
"""
368-
function findall(t::Union{AbstractString,Regex}, s::AbstractString; overlap::Bool=false)
382+
function findall(t::Union{AbstractString,AbstractPattern}, s::AbstractString; overlap::Bool=false)
369383
found = UnitRange{Int}[]
370384
i, e = firstindex(s), lastindex(s)
371385
while true
@@ -381,7 +395,7 @@ end
381395

382396
"""
383397
count(
384-
pattern::Union{AbstractString,Regex},
398+
pattern::Union{AbstractString,AbstractPattern},
385399
string::AbstractString;
386400
overlap::Bool = false,
387401
)
@@ -392,7 +406,7 @@ calling `length(findall(pattern, string))` but more efficient.
392406
If `overlap=true`, the matching sequences are allowed to overlap indices in the
393407
original string, otherwise they must be from disjoint character ranges.
394408
"""
395-
function count(t::Union{AbstractString,Regex}, s::AbstractString; overlap::Bool=false)
409+
function count(t::Union{AbstractString,AbstractPattern}, s::AbstractString; overlap::Bool=false)
396410
n = 0
397411
i, e = firstindex(s), lastindex(s)
398412
while true

base/strings/search.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ end
8888

8989
"""
9090
findfirst(pattern::AbstractString, string::AbstractString)
91-
findfirst(pattern::Regex, string::String)
91+
findfirst(pattern::AbstractPattern, string::String)
9292
9393
Find the first occurrence of `pattern` in `string`. Equivalent to
9494
[`findnext(pattern, string, firstindex(s))`](@ref).
@@ -250,7 +250,7 @@ end
250250

251251
"""
252252
findnext(pattern::AbstractString, string::AbstractString, start::Integer)
253-
findnext(pattern::Regex, string::String, start::Integer)
253+
findnext(pattern::AbstractPattern, string::String, start::Integer)
254254
255255
Find the next occurrence of `pattern` in `string` starting at position `start`.
256256
`pattern` can be either a string, or a regular expression, in which case `string`
@@ -507,7 +507,7 @@ findprev(ch::AbstractChar, string::AbstractString, ind::Integer) =
507507
findprev(==(ch), string, ind)
508508

509509
"""
510-
occursin(needle::Union{AbstractString,Regex,AbstractChar}, haystack::AbstractString)
510+
occursin(needle::Union{AbstractString,AbstractPattern,AbstractChar}, haystack::AbstractString)
511511
512512
Determine whether the first argument is a substring of the second. If `needle`
513513
is a regular expression, checks whether `haystack` contains a match.

base/strings/util.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -546,7 +546,7 @@ If `count` is provided, replace at most `count` occurrences.
546546
`pat` may be a single character, a vector or a set of characters, a string,
547547
or a regular expression.
548548
If `r` is a function, each occurrence is replaced with `r(s)`
549-
where `s` is the matched substring (when `pat` is a `Regex` or `AbstractString`) or
549+
where `s` is the matched substring (when `pat` is a `AbstractPattern` or `AbstractString`) or
550550
character (when `pat` is an `AbstractChar` or a collection of `AbstractChar`).
551551
If `pat` is a regular expression and `r` is a [`SubstitutionString`](@ref), then capture group
552552
references in `r` are replaced with the corresponding matched text.

0 commit comments

Comments
 (0)