@@ -7,6 +7,13 @@ include("pcre.jl")
7
7
const DEFAULT_COMPILER_OPTS = PCRE. UTF | PCRE. NO_UTF_CHECK | PCRE. ALT_BSUX | PCRE. UCP
8
8
const DEFAULT_MATCH_OPTS = PCRE. NO_UTF_CHECK
9
9
10
+ """
11
+ An abstract type representing any sort of pattern matching expression (typically a regular
12
+ expression).
13
+ `AbstractPattern` objects can be used to match strings with [`match`](@ref).
14
+ """
15
+ abstract type AbstractPattern end
16
+
10
17
"""
11
18
Regex(pattern[, flags])
12
19
@@ -17,7 +24,7 @@ with [`match`](@ref).
17
24
`Regex(pattern[, flags])` constructor is usually used if the `pattern` string needs
18
25
to be interpolated. See the documentation of the string macro for details on flags.
19
26
"""
20
- mutable struct Regex
27
+ mutable struct Regex <: AbstractPattern
21
28
pattern:: String
22
29
compile_options:: UInt32
23
30
match_options:: UInt32
@@ -128,10 +135,16 @@ function show(io::IO, re::Regex)
128
135
end
129
136
end
130
137
138
+ """
139
+ `AbstractMatch` objects are used to represent information about matches found in a string
140
+ using an `AbstractPattern`.
141
+ """
142
+ abstract type AbstractMatch end
143
+
131
144
# TODO : map offsets into strings in other encodings back to original indices.
132
145
# or maybe it's better to just fail since that would be quite slow
133
146
134
- struct RegexMatch
147
+ struct RegexMatch <: AbstractMatch
135
148
match:: SubString{String}
136
149
captures:: Vector{Union{Nothing,SubString{String}}}
137
150
offset:: Int
278
291
"""
279
292
function match end
280
293
281
- function match (re:: Regex , str:: Union{SubString{String}, String} , idx:: Integer , add_opts:: UInt32 = UInt32 (0 ))
294
+ function match (re:: Regex , str:: Union{SubString{String}, String} , idx:: Integer ,
295
+ add_opts:: UInt32 = UInt32 (0 ))
282
296
compile (re)
283
297
opts = re. match_options | add_opts
284
298
matched, data = PCRE. exec_r_data (re. regex, str, idx- 1 , opts)
@@ -336,7 +350,7 @@ findfirst(r::Regex, s::AbstractString) = findnext(r,s,firstindex(s))
336
350
337
351
"""
338
352
findall(
339
- pattern::Union{AbstractString,Regex },
353
+ pattern::Union{AbstractString,AbstractPattern },
340
354
string::AbstractString;
341
355
overlap::Bool = false,
342
356
)
@@ -365,7 +379,7 @@ julia> findall("a", "banana")
365
379
6:6
366
380
```
367
381
"""
368
- function findall (t:: Union{AbstractString,Regex } , s:: AbstractString ; overlap:: Bool = false )
382
+ function findall (t:: Union{AbstractString,AbstractPattern } , s:: AbstractString ; overlap:: Bool = false )
369
383
found = UnitRange{Int}[]
370
384
i, e = firstindex (s), lastindex (s)
371
385
while true
381
395
382
396
"""
383
397
count(
384
- pattern::Union{AbstractString,Regex },
398
+ pattern::Union{AbstractString,AbstractPattern },
385
399
string::AbstractString;
386
400
overlap::Bool = false,
387
401
)
@@ -392,7 +406,7 @@ calling `length(findall(pattern, string))` but more efficient.
392
406
If `overlap=true`, the matching sequences are allowed to overlap indices in the
393
407
original string, otherwise they must be from disjoint character ranges.
394
408
"""
395
- function count (t:: Union{AbstractString,Regex } , s:: AbstractString ; overlap:: Bool = false )
409
+ function count (t:: Union{AbstractString,AbstractPattern } , s:: AbstractString ; overlap:: Bool = false )
396
410
n = 0
397
411
i, e = firstindex (s), lastindex (s)
398
412
while true
0 commit comments