Skip to content

Commit 7abd5ad

Browse files
PCRE: use correct type for error constants (Cint), audit C calls (#35773)
1 parent a4641c8 commit 7abd5ad

File tree

2 files changed

+49
-34
lines changed

2 files changed

+49
-34
lines changed

base/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ else
2222
endif
2323

2424
$(BUILDDIR)/pcre_h.jl: $(PCRE_INCL_PATH)
25-
@$(call PRINT_PERL, $(CPP) -D PCRE2_CODE_UNIT_WIDTH=8 -dM $< | perl -nle '/^\s*#define\s+PCRE2_(\w*)\s*\(?($(PCRE_CONST))\)?u?\s*$$/ and print "const $$1 = $$2 % UInt32"' | LC_ALL=C sort > $@)
25+
@$(call PRINT_PERL, $(CPP) -D PCRE2_CODE_UNIT_WIDTH=8 -dM $< | perl -nle '/^\s*#define\s+PCRE2_(\w*)\s*\(?($(PCRE_CONST))\)?u?\s*$$/ and print index($$1, "ERROR_") == 0 ? "const $$1 = Cint($$2)" : "const $$1 = UInt32($$2)"' | LC_ALL=C sort > $@)
2626

2727
$(BUILDDIR)/errno_h.jl:
2828
@$(call PRINT_PERL, echo '#include <errno.h>' | $(CPP) -dM - | perl -nle 'print "const $$1 = Int32($$2)" if /^#define\s+(E\w+)\s+(\d+)\s*$$/' | LC_ALL=C sort > $@)

base/pcre.jl

Lines changed: 48 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,16 @@ module PCRE
66

77
import ..RefValue
88

9-
include(string(length(Core.ARGS) >= 2 ? Core.ARGS[2] : "", "pcre_h.jl")) # include($BUILDROOT/base/pcre_h.jl)
9+
# include($BUILDROOT/base/pcre_h.jl)
10+
include(string(length(Core.ARGS) >= 2 ? Core.ARGS[2] : "", "pcre_h.jl"))
1011

1112
const PCRE_LIB = "libpcre2-8"
1213

1314
function create_match_context()
1415
JIT_STACK_START_SIZE = 32768
1516
JIT_STACK_MAX_SIZE = 1048576
1617
jit_stack = ccall((:pcre2_jit_stack_create_8, PCRE_LIB), Ptr{Cvoid},
17-
(Cint, Cint, Ptr{Cvoid}),
18+
(Csize_t, Csize_t, Ptr{Cvoid}),
1819
JIT_STACK_START_SIZE, JIT_STACK_MAX_SIZE, C_NULL)
1920
ctx = ccall((:pcre2_match_context_create_8, PCRE_LIB),
2021
Ptr{Cvoid}, (Ptr{Cvoid},), C_NULL)
@@ -90,22 +91,22 @@ const UNSET = ~Csize_t(0) # Indicates that an output vector element is unset
9091

9192
function info(regex::Ptr{Cvoid}, what::Integer, ::Type{T}) where T
9293
buf = RefValue{T}()
93-
ret = ccall((:pcre2_pattern_info_8, PCRE_LIB), Int32,
94-
(Ptr{Cvoid}, Int32, Ptr{Cvoid}),
95-
regex, what, buf) % UInt32
94+
ret = ccall((:pcre2_pattern_info_8, PCRE_LIB), Cint,
95+
(Ptr{Cvoid}, UInt32, Ptr{Cvoid}),
96+
regex, what, buf)
9697
if ret != 0
97-
error(ret == ERROR_NULL ? "NULL regex object" :
98-
ret == ERROR_BADMAGIC ? "invalid regex object" :
99-
ret == ERROR_BADOPTION ? "invalid option flags" :
100-
"unknown error $ret")
98+
error(ret == ERROR_NULL ? "PCRE error: NULL regex object" :
99+
ret == ERROR_BADMAGIC ? "PCRE error: invalid regex object" :
100+
ret == ERROR_BADOPTION ? "PCRE error: invalid option flags" :
101+
"PCRE error: unknown error ($ret)")
101102
end
102-
buf[]
103+
return buf[]
103104
end
104105

105106
function ovec_length(match_data)
106107
n = ccall((:pcre2_get_ovector_count_8, PCRE_LIB), UInt32,
107108
(Ptr{Cvoid},), match_data)
108-
return 2n
109+
return 2Int(n)
109110
end
110111

111112
function ovec_ptr(match_data)
@@ -115,18 +116,23 @@ function ovec_ptr(match_data)
115116
end
116117

117118
function compile(pattern::AbstractString, options::Integer)
119+
if !(pattern isa Union{String,SubString{String}})
120+
pattern = String(pattern)
121+
end
118122
errno = RefValue{Cint}(0)
119123
erroff = RefValue{Csize_t}(0)
120124
re_ptr = ccall((:pcre2_compile_8, PCRE_LIB), Ptr{Cvoid},
121125
(Ptr{UInt8}, Csize_t, UInt32, Ref{Cint}, Ref{Csize_t}, Ptr{Cvoid}),
122-
pattern, sizeof(pattern), options, errno, erroff, C_NULL)
123-
re_ptr == C_NULL && error("PCRE compilation error: $(err_message(errno[])) at offset $(erroff[])")
124-
re_ptr
126+
pattern, ncodeunits(pattern), options, errno, erroff, C_NULL)
127+
if re_ptr == C_NULL
128+
error("PCRE compilation error: $(err_message(errno[])) at offset $(erroff[])")
129+
end
130+
return re_ptr
125131
end
126132

127133
function jit_compile(regex::Ptr{Cvoid})
128134
errno = ccall((:pcre2_jit_compile_8, PCRE_LIB), Cint,
129-
(Ptr{Cvoid}, UInt32), regex, JIT_COMPLETE) % UInt32
135+
(Ptr{Cvoid}, UInt32), regex, JIT_COMPLETE)
130136
errno == 0 && return true
131137
errno == ERROR_JIT_BADOPTION && return false
132138
error("PCRE JIT error: $(err_message(errno))")
@@ -144,20 +150,25 @@ free_jit_stack(stack) =
144150
free_match_context(context) =
145151
ccall((:pcre2_match_context_free_8, PCRE_LIB), Cvoid, (Ptr{Cvoid},), context)
146152

147-
function err_message(errno)
148-
buffer = Vector{UInt8}(undef, 256)
149-
ccall((:pcre2_get_error_message_8, PCRE_LIB), Cvoid,
150-
(UInt32, Ptr{UInt8}, Csize_t), errno, buffer, sizeof(buffer))
151-
GC.@preserve buffer unsafe_string(pointer(buffer))
153+
function err_message(errno::Integer)
154+
buffer = Vector{UInt8}(undef, 1024)
155+
ret = ccall((:pcre2_get_error_message_8, PCRE_LIB), Cint,
156+
(Cint, Ptr{UInt8}, Csize_t), errno, buffer, length(buffer))
157+
ret == ERROR_BADDATA && error("PCRE error: invalid errno ($errno)")
158+
# TODO: seems like there should be a better way to get this string
159+
return GC.@preserve buffer unsafe_string(pointer(buffer))
152160
end
153161

154162
function exec(re, subject, offset, options, match_data)
163+
if !(subject isa Union{String,SubString{String}})
164+
subject = String(subject)
165+
end
155166
rc = ccall((:pcre2_match_8, PCRE_LIB), Cint,
156-
(Ptr{Cvoid}, Ptr{UInt8}, Csize_t, Csize_t, Cuint, Ptr{Cvoid}, Ptr{Cvoid}),
157-
re, subject, sizeof(subject), offset, options, match_data, get_local_match_context())
167+
(Ptr{Cvoid}, Ptr{UInt8}, Csize_t, Csize_t, UInt32, Ptr{Cvoid}, Ptr{Cvoid}),
168+
re, subject, ncodeunits(subject), offset, options, match_data, get_local_match_context())
158169
# rc == -1 means no match, -2 means partial match.
159170
rc < -2 && error("PCRE.exec error: $(err_message(rc))")
160-
rc >= 0
171+
return rc >= 0
161172
end
162173

163174
function exec_r(re, subject, offset, options)
@@ -174,21 +185,25 @@ function exec_r_data(re, subject, offset, options)
174185
end
175186

176187
function create_match_data(re)
177-
ccall((:pcre2_match_data_create_from_pattern_8, PCRE_LIB),
178-
Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}), re, C_NULL)
188+
p = ccall((:pcre2_match_data_create_from_pattern_8, PCRE_LIB),
189+
Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}), re, C_NULL)
190+
p == C_NULL && error("PCRE error: could not allocate memory")
191+
return p
179192
end
180193

181194
function substring_number_from_name(re, name)
182-
ccall((:pcre2_substring_number_from_name_8, PCRE_LIB), Cint,
183-
(Ptr{Cvoid}, Cstring), re, name)
195+
n = ccall((:pcre2_substring_number_from_name_8, PCRE_LIB), Cint,
196+
(Ptr{Cvoid}, Cstring), re, name)
197+
n < 0 && error("PCRE error: $(err_message(n))")
198+
return Int(n)
184199
end
185200

186201
function substring_length_bynumber(match_data, number)
187202
s = RefValue{Csize_t}()
188203
rc = ccall((:pcre2_substring_length_bynumber_8, PCRE_LIB), Cint,
189-
(Ptr{Cvoid}, UInt32, Ref{Csize_t}), match_data, number, s)
204+
(Ptr{Cvoid}, Cint, Ref{Csize_t}), match_data, number, s)
190205
rc < 0 && error("PCRE error: $(err_message(rc))")
191-
convert(Int, s[])
206+
return Int(s[])
192207
end
193208

194209
function substring_copy_bynumber(match_data, number, buf, buf_size)
@@ -197,15 +212,15 @@ function substring_copy_bynumber(match_data, number, buf, buf_size)
197212
(Ptr{Cvoid}, UInt32, Ptr{UInt8}, Ref{Csize_t}),
198213
match_data, number, buf, s)
199214
rc < 0 && error("PCRE error: $(err_message(rc))")
200-
convert(Int, s[])
215+
return Int(s[])
201216
end
202217

203218
function capture_names(re)
204219
name_count = info(re, INFO_NAMECOUNT, UInt32)
205220
name_entry_size = info(re, INFO_NAMEENTRYSIZE, UInt32)
206221
nametable_ptr = info(re, INFO_NAMETABLE, Ptr{UInt8})
207-
names = Dict{Int, String}()
208-
for i=1:name_count
222+
names = Dict{Int,String}()
223+
for i = 1:name_count
209224
offset = (i-1)*name_entry_size + 1
210225
# The capture group index corresponding to name 'i' is stored as a
211226
# big-endian 16-bit value.
@@ -216,7 +231,7 @@ function capture_names(re)
216231
# after the index.
217232
names[idx] = unsafe_string(nametable_ptr+offset+1)
218233
end
219-
names
234+
return names
220235
end
221236

222237
end # module

0 commit comments

Comments
 (0)