@@ -10,7 +10,29 @@ match strings with [`match`](@ref).
10
10
"""
11
11
abstract type AbstractPattern end
12
12
13
- nothing_sentinel (i) = i == 0 ? nothing : i
13
+ # TODO : These unions represent bytes in memory that can be accessed via a pointer.
14
+ # this property is used throughout Julia, e.g. also in IO code.
15
+ # This deserves a better solution - see #53178.
16
+ # If such a better solution comes in place, these unions should be replaced.
17
+ const DenseInt8 = Union{
18
+ DenseArray{Int8},
19
+ FastContiguousSubArray{Int8,N,<: DenseArray } where N
20
+ }
21
+
22
+ # Note: This union is different from that above in that it includes CodeUnits.
23
+ # Currently, this is redundant as CodeUnits <: DenseVector, but this subtyping
24
+ # is buggy and may be removed in the future, see #54002
25
+ const DenseUInt8 = Union{
26
+ DenseArray{UInt8},
27
+ FastContiguousSubArray{UInt8,N,<: DenseArray } where N,
28
+ CodeUnits{UInt8, <: Union{String, SubString{String}} },
29
+ FastContiguousSubArray{UInt8,N,<: CodeUnits{UInt8, <:Union{String, SubString{String}}} } where N,
30
+ }
31
+
32
+ const DenseUInt8OrInt8 = Union{DenseUInt8, DenseInt8}
33
+
34
+ last_byteindex (x:: Union{String, SubString{String}} ) = ncodeunits (x)
35
+ last_byteindex (x:: DenseUInt8OrInt8 ) = lastindex (x)
14
36
15
37
function last_utf8_byte (c:: Char )
16
38
u = reinterpret (UInt32, c)
@@ -30,11 +52,11 @@ function findnext(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:AbstractChar}
30
52
end
31
53
@inbounds isvalid (s, i) || string_index_err (s, i)
32
54
c = pred. x
33
- c ≤ ' \x 7f' && return nothing_sentinel ( _search (s, c % UInt8 , i) )
55
+ c ≤ ' \x 7f' && return _search (s, first_utf8_byte (c) , i)
34
56
while true
35
57
i = _search (s, first_utf8_byte (c), i)
36
- i == 0 && return nothing
37
- pred (s[i]) && return i
58
+ i === nothing && return nothing
59
+ isvalid (s, i) && pred (s[i]) && return i
38
60
i = nextind (s, i)
39
61
end
40
62
end
@@ -47,31 +69,41 @@ const DenseBytes = Union{
47
69
CodeUnits{UInt8, <: Union{String, SubString{String}} },
48
70
}
49
71
50
- const ByteArray = Union{DenseBytes, DenseArrayType{Int8}}
72
+ function findfirst (pred:: Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{UInt8, Int8}} , a:: Union{DenseInt8, DenseUInt8} )
73
+ findnext (pred, a, firstindex (a))
74
+ end
51
75
52
- findfirst (pred:: Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}} , a:: ByteArray ) =
53
- nothing_sentinel (_search (a, pred. x))
76
+ function findnext (pred:: Fix2{<:Union{typeof(isequal),typeof(==)},UInt8} , a:: DenseUInt8 , i:: Integer )
77
+ _search (a, pred. x, i)
78
+ end
54
79
55
- findnext (pred:: Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}} , a:: ByteArray , i:: Integer ) =
56
- nothing_sentinel (_search (a, pred. x, i))
80
+ function findnext (pred:: Fix2{<:Union{typeof(isequal),typeof(==)},Int8} , a:: DenseInt8 , i:: Integer )
81
+ _search (a, pred. x, i)
82
+ end
57
83
58
- findfirst (:: typeof (iszero), a:: ByteArray ) = nothing_sentinel (_search (a, zero (UInt8)))
59
- findnext (:: typeof (iszero), a:: ByteArray , i:: Integer ) = nothing_sentinel (_search (a, zero (UInt8), i))
84
+ # iszero is special, in that the bitpattern for zero for Int8 and UInt8 is the same,
85
+ # so we can use memchr even if we search for an Int8 in an UInt8 array or vice versa
86
+ findfirst (:: typeof (iszero), a:: DenseUInt8OrInt8 ) = _search (a, zero (UInt8))
87
+ findnext (:: typeof (iszero), a:: DenseUInt8OrInt8 , i:: Integer ) = _search (a, zero (UInt8), i)
60
88
61
- function _search (a:: Union{String,SubString{String},<:ByteArray} , b:: Union{Int8,UInt8} , i:: Integer = 1 )
62
- if i < 1
89
+ function _search (a:: Union{String,SubString{String},DenseUInt8OrInt8} , b:: Union{Int8,UInt8} , i:: Integer = firstindex (a))
90
+ fst = firstindex (a)
91
+ lst = last_byteindex (a)
92
+ if i < fst
63
93
throw (BoundsError (a, i))
64
94
end
65
- n = sizeof (a)
66
- if i > n
67
- return i == n + 1 ? 0 : throw (BoundsError (a, i))
95
+ n_bytes = lst - i + 1
96
+ if i > lst
97
+ return i == lst + 1 ? nothing : throw (BoundsError (a, i))
68
98
end
69
- p = pointer (a)
70
- q = GC. @preserve a ccall (:memchr , Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p+ i- 1 , b, n- i+ 1 )
71
- return q == C_NULL ? 0 : Int (q- p+ 1 )
99
+ GC. @preserve a begin
100
+ p = pointer (a)
101
+ q = ccall (:memchr , Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p+ i- fst, b, n_bytes)
102
+ end
103
+ return q == C_NULL ? nothing : (q- p+ fst) % Int
72
104
end
73
105
74
- function _search (a:: ByteArray , b:: AbstractChar , i:: Integer = 1 )
106
+ function _search (a:: DenseUInt8 , b:: AbstractChar , i:: Integer = firstindex (a) )
75
107
if isascii (b)
76
108
_search (a,UInt8 (b),i)
77
109
else
@@ -80,41 +112,51 @@ function _search(a::ByteArray, b::AbstractChar, i::Integer = 1)
80
112
end
81
113
82
114
function findprev (pred:: Fix2{<:Union{typeof(isequal),typeof(==)},<:AbstractChar} ,
83
- s:: String , i:: Integer )
115
+ s:: Union{ String, SubString{String}} , i:: Integer )
84
116
c = pred. x
85
- c ≤ ' \x 7f' && return nothing_sentinel ( _rsearch (s, c % UInt8 , i) )
117
+ c ≤ ' \x 7f' && return _rsearch (s, first_utf8_byte (c) , i)
86
118
b = first_utf8_byte (c)
87
119
while true
88
120
i = _rsearch (s, b, i)
89
- i == 0 && return nothing
90
- pred (s[i]) && return i
121
+ i == nothing && return nothing
122
+ isvalid (s, i) && pred (s[i]) && return i
91
123
i = prevind (s, i)
92
124
end
93
125
end
94
126
95
- findlast (pred:: Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}} , a:: ByteArray ) =
96
- nothing_sentinel (_rsearch (a, pred. x))
127
+ function findlast (pred:: Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}} , a:: DenseUInt8OrInt8 )
128
+ findprev (pred, a, lastindex (a))
129
+ end
97
130
98
- findprev (pred:: Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}} , a:: ByteArray , i:: Integer ) =
99
- nothing_sentinel (_rsearch (a, pred. x, i))
131
+ function findprev (pred:: Fix2{<:Union{typeof(isequal),typeof(==)},Int8} , a:: DenseInt8 , i:: Integer )
132
+ _rsearch (a, pred. x, i)
133
+ end
100
134
101
- findlast (:: typeof (iszero), a:: ByteArray ) = nothing_sentinel (_rsearch (a, zero (UInt8)))
102
- findprev (:: typeof (iszero), a:: ByteArray , i:: Integer ) = nothing_sentinel (_rsearch (a, zero (UInt8), i))
135
+ function findprev (pred:: Fix2{<:Union{typeof(isequal),typeof(==)},UInt8} , a:: DenseUInt8 , i:: Integer )
136
+ _rsearch (a, pred. x, i)
137
+ end
103
138
104
- function _rsearch (a:: Union{String,ByteArray} , b:: Union{Int8,UInt8} , i:: Integer = sizeof (a))
105
- if i < 1
106
- return i == 0 ? 0 : throw (BoundsError (a, i))
139
+ # See comments above for findfirst(::typeof(iszero)) methods
140
+ findlast (:: typeof (iszero), a:: DenseUInt8OrInt8 ) = _rsearch (a, zero (UInt8))
141
+ findprev (:: typeof (iszero), a:: DenseUInt8OrInt8 , i:: Integer ) = _rsearch (a, zero (UInt8), i)
142
+
143
+ function _rsearch (a:: Union{String,SubString{String},DenseUInt8OrInt8} , b:: Union{Int8,UInt8} , i:: Integer = last_byteindex (a))
144
+ fst = firstindex (a)
145
+ lst = last_byteindex (a)
146
+ if i < fst
147
+ return i == fst - 1 ? nothing : throw (BoundsError (a, i))
148
+ end
149
+ if i > lst
150
+ return i == lst+ 1 ? nothing : throw (BoundsError (a, i))
107
151
end
108
- n = sizeof (a)
109
- if i > n
110
- return i == n + 1 ? 0 : throw ( BoundsError (a, i) )
152
+ GC . @preserve a begin
153
+ p = pointer (a)
154
+ q = ccall ( :memrchr , Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p, b, i - fst + 1 )
111
155
end
112
- p = pointer (a)
113
- q = GC. @preserve a ccall (:memrchr , Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p, b, i)
114
- return q == C_NULL ? 0 : Int (q- p+ 1 )
156
+ return q == C_NULL ? nothing : (q- p+ fst) % Int
115
157
end
116
158
117
- function _rsearch (a:: ByteArray , b:: AbstractChar , i:: Integer = length (a))
159
+ function _rsearch (a:: DenseUInt8 , b:: AbstractChar , i:: Integer = length (a))
118
160
if isascii (b)
119
161
_rsearch (a,UInt8 (b),i)
120
162
else
@@ -224,18 +266,19 @@ end
224
266
225
267
in (c:: AbstractChar , s:: AbstractString ) = (findfirst (isequal (c),s)!= = nothing )
226
268
227
- function _searchindex (s:: Union{AbstractString,ByteArray } ,
269
+ function _searchindex (s:: Union{AbstractString,DenseUInt8OrInt8 } ,
228
270
t:: Union{AbstractString,AbstractChar,Int8,UInt8} ,
229
271
i:: Integer )
272
+ sentinel = firstindex (s) - 1
230
273
x = Iterators. peel (t)
231
274
if isnothing (x)
232
- return 1 <= i <= nextind (s,lastindex (s)):: Int ? i :
275
+ return firstindex (s) <= i <= nextind (s,lastindex (s)):: Int ? i :
233
276
throw (BoundsError (s, i))
234
277
end
235
278
t1, trest = x
236
279
while true
237
280
i = findnext (isequal (t1),s,i)
238
- if i === nothing return 0 end
281
+ if i === nothing return sentinel end
239
282
ii = nextind (s, i):: Int
240
283
a = Iterators. Stateful (trest)
241
284
matched = all (splat (== ), zip (SubString (s, ii), a))
@@ -509,9 +552,8 @@ julia> findall(UInt8[1,2], UInt8[1,2,3,1,2])
509
552
!!! compat "Julia 1.3"
510
553
This method requires at least Julia 1.3.
511
554
"""
512
-
513
- function findall (t:: Union{AbstractString, AbstractPattern, AbstractVector{<:Union{Int8,UInt8}}} ,
514
- s:: Union{AbstractString, AbstractPattern, AbstractVector{<:Union{Int8,UInt8}}} ,
555
+ function findall (t:: Union{AbstractString, AbstractPattern, AbstractVector{UInt8}} ,
556
+ s:: Union{AbstractString, AbstractPattern, AbstractVector{UInt8}} ,
515
557
; overlap:: Bool = false )
516
558
found = UnitRange{Int}[]
517
559
i, e = firstindex (s), lastindex (s)
@@ -564,7 +606,7 @@ function _rsearchindex(s::AbstractString,
564
606
end
565
607
end
566
608
567
- function _rsearchindex (s:: String , t:: String , i:: Integer )
609
+ function _rsearchindex (s:: Union{ String, SubString{String}} , t:: Union{ String, SubString{String}} , i:: Integer )
568
610
# Check for fast case of a single byte
569
611
if lastindex (t) == 1
570
612
return something (findprev (isequal (t[1 ]), s, i), 0 )
0 commit comments