26
26
27
27
# Modifications made by the Truffle team are:
28
28
#
29
- # Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved. This
29
+ # Copyright (c) 2017, 2019 Oracle and/or its affiliates. All rights reserved. This
30
30
# code is released under a tri EPL/GPL/LGPL license. You can use it,
31
31
# redistribute it and/or modify it under the terms of the:
32
32
#
35
35
# GNU Lesser General Public License version 2.1.
36
36
37
37
38
- class ScanError < StandardError ; end
38
+ class ScanError < StandardError
39
+ end
39
40
40
41
class StringScanner
42
+
41
43
Id = 'None$Id' . freeze
42
44
Version = '1.0.0' . freeze
43
45
44
46
attr_reader :pos , :match , :prev_pos
45
47
48
+ def initialize ( string , dup = false )
49
+ if string . instance_of? String
50
+ @original = string
51
+ @string = string
52
+ else
53
+ @original = StringValue ( string )
54
+ @string = String . new @original
55
+ end
56
+
57
+ reset_state
58
+ end
59
+
46
60
def pos = ( n )
47
61
n = Integer ( n )
48
62
@@ -59,12 +73,10 @@ def pos=(n)
59
73
alias_method :pointer= , :pos=
60
74
61
75
def []( n )
62
- # Truffle: no eager check
63
76
if @match
64
- # Truffle: follow MRI
65
77
raise TypeError , "no implicit conversion of #{ n . class } into Integer" if Range === n
66
78
str = @match [ n ]
67
- str . taint if @string . tainted? # Truffle: propagate taint
79
+ str . taint if @string . tainted?
68
80
str
69
81
end
70
82
end
@@ -75,17 +87,16 @@ def bol?
75
87
76
88
alias_method :beginning_of_line? , :bol?
77
89
78
- # Truffle: added
79
90
def charpos
80
91
@string . byteslice ( 0 , @pos ) . length
81
92
end
82
93
83
94
def check ( pattern )
84
- _scan pattern , false , true , true
95
+ scan_internal pattern , false , true , true
85
96
end
86
97
87
98
def check_until ( pattern )
88
- _scan pattern , false , true , false
99
+ scan_internal pattern , false , true , false
89
100
end
90
101
91
102
def clear
@@ -105,17 +116,27 @@ def empty?
105
116
end
106
117
107
118
def eos?
108
- raise ArgumentError , 'uninitialized StringScanner object' unless @string # Truffle
119
+ raise ArgumentError , 'uninitialized StringScanner object' unless @string
109
120
@pos >= @string . bytesize
110
121
end
111
122
112
123
def exist? ( pattern )
113
- _scan pattern , false , false , false
124
+ scan_internal pattern , false , false , false
114
125
end
115
126
116
127
def get_byte
117
- # Truffle: correct get_byte with non-ascii strings
118
- _get_byte
128
+ if eos?
129
+ @match = nil
130
+ return nil
131
+ end
132
+
133
+ # We need to match one byte, regardless of the string encoding
134
+ @match = Truffle . invoke_primitive :regexp_search_from_binary , /./mn , @string , pos
135
+
136
+ @prev_pos = @pos
137
+ @pos += 1
138
+
139
+ @string . byteslice ( @prev_pos , 1 )
119
140
end
120
141
121
142
def getbyte
@@ -127,19 +148,6 @@ def getch
127
148
scan ( /./m )
128
149
end
129
150
130
- def initialize ( string , dup = false )
131
- if string . instance_of? String
132
- @original = string
133
- @string = string
134
- else
135
- @original = StringValue ( string )
136
- @string = String . new @original
137
- end
138
-
139
- reset_state
140
- end
141
-
142
- # Truffle: fix to use self.class instead of hard-coded StringScanner
143
151
def inspect
144
152
if defined? @string
145
153
if eos?
@@ -172,13 +180,13 @@ def inspect
172
180
end
173
181
174
182
def match? ( pattern )
175
- _scan pattern , false , false , true
183
+ scan_internal pattern , false , false , true
176
184
end
177
185
178
186
def matched
179
187
if @match
180
188
matched = @match . to_s
181
- matched . taint if @string . tainted? # Truffle: propagate taint
189
+ matched . taint if @string . tainted?
182
190
matched
183
191
end
184
192
end
@@ -233,31 +241,31 @@ def restsize
233
241
end
234
242
235
243
def scan ( pattern )
236
- _scan pattern , true , true , true
244
+ scan_internal pattern , true , true , true
237
245
end
238
246
239
247
def scan_until ( pattern )
240
- _scan pattern , true , true , false
248
+ scan_internal pattern , true , true , false
241
249
end
242
250
243
251
def scan_full ( pattern , advance_pos , getstr )
244
- _scan pattern , advance_pos , getstr , true
252
+ scan_internal pattern , advance_pos , getstr , true
245
253
end
246
254
247
255
def search_full ( pattern , advance_pos , getstr )
248
- _scan pattern , advance_pos , getstr , false
256
+ scan_internal pattern , advance_pos , getstr , false
249
257
end
250
258
251
259
def self . must_C_version
252
260
self
253
261
end
254
262
255
263
def skip ( pattern )
256
- _scan pattern , true , false , true
264
+ scan_internal pattern , true , false , true
257
265
end
258
266
259
267
def skip_until ( pattern )
260
- _scan pattern , true , false , false
268
+ scan_internal pattern , true , false , false
261
269
end
262
270
263
271
def string
@@ -293,8 +301,6 @@ def unscan
293
301
def peek ( len )
294
302
raise ArgumentError if len < 0
295
303
return '' if len . zero?
296
-
297
- # Truffle: correctly use byte offsets and no rescue
298
304
@string . byteslice ( pos , len )
299
305
end
300
306
@@ -303,54 +309,37 @@ def peep(len)
303
309
peek len
304
310
end
305
311
306
- def _scan ( pattern , advance_pos , getstr , headonly )
312
+ def scan_internal ( pattern , advance_pos , getstr , headonly )
307
313
unless pattern . kind_of? Regexp
308
314
raise TypeError , "bad pattern argument: #{ pattern . inspect } "
309
315
end
310
- raise ArgumentError , 'uninitialized StringScanner object' unless @string # Truffle
311
-
312
- @match = nil
313
-
314
- if headonly
315
- # NOTE - match_start is an Oniguruma feature that Rubinius exposes.
316
- # We use it here to avoid creating a new Regexp with '^' prepended.
317
- @match = pattern . match_start @string , @pos
318
- else
319
- # NOTE - search_from is an Oniguruma feature that Rubinius exposes.
320
- # We use it so we can begin the search in the middle of the string
321
- @match = pattern . search_from @string , @pos
316
+ raise ArgumentError , 'uninitialized StringScanner object' unless @string
317
+
318
+ # If the pattern already starts with a ^, and we're not at the start of
319
+ # the string, then we can't match as normal because match_from still tries
320
+ # to match the ^ at position 0 even though it's looking from point pos
321
+ # onwards, even if headonly is set. Instead, remove the ^. This could
322
+ # possibly be fixed in Joni instead, or maybe there is already some option
323
+ # we're not using.
324
+
325
+ if pattern . source [ 0 ] == '^' && pos > 0
326
+ pattern = Regexp . new ( pattern . source [ 1 ..- 1 ] )
327
+ headonly = true
322
328
end
323
329
330
+ @match = pattern . match_onwards @string , pos , headonly
324
331
return nil unless @match
325
332
326
333
fin = @match . byte_end ( 0 )
327
334
328
335
@prev_pos = @pos
329
-
330
336
@pos = fin if advance_pos
331
337
332
338
width = fin - @prev_pos
333
-
334
339
return width unless getstr
335
340
336
341
@string . byteslice ( @prev_pos , width )
337
342
end
338
- private :_scan
339
-
340
- # Truffle: correct get_byte with non-ascii strings
341
- def _get_byte
342
- if eos?
343
- @match = nil
344
- return nil
345
- end
346
-
347
- # We need to match one byte, regardless of the string encoding
348
- @match = Truffle . invoke_primitive :regexp_search_from_binary , /./mn , @string , pos
343
+ private :scan_internal
349
344
350
- @prev_pos = @pos
351
- @pos += 1
352
-
353
- @string . byteslice ( @prev_pos , 1 )
354
- end
355
- private :_get_byte
356
345
end
0 commit comments