Skip to content

Commit 59ecd36

Browse files
committed
[GR-40333] Adding MatchData#match and MatchData#match_length
PullRequest: truffleruby/3686
2 parents 4edc01d + cf42ea7 commit 59ecd36

File tree

8 files changed

+193
-149
lines changed

8 files changed

+193
-149
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ Compatibility:
9191
* Add `objspace/trace` file (#2733, @andrykonchin).
9292
* Add `Process._fork` (#2733, @horakivo).
9393
* Update to JCodings 1.0.58 and Joni 2.1.44 (@eregon).
94+
* Add `MatchData#match` and `MatchData#match_length` (#2733, @horakivo).
9495

9596
Performance:
9697

spec/tags/core/matchdata/match_length_tags.txt

Lines changed: 0 additions & 3 deletions
This file was deleted.

spec/tags/core/matchdata/match_tags.txt

Lines changed: 0 additions & 3 deletions
This file was deleted.

src/main/java/org/truffleruby/core/CoreLibrary.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1013,6 +1013,7 @@ public boolean isTruffleBootMainMethod(SharedMethodInfo info) {
10131013
"/core/truffle/ctype.rb",
10141014
"/core/truffle/integer_operations.rb",
10151015
"/core/integer.rb",
1016+
"/core/match_data.rb",
10161017
"/core/regexp.rb",
10171018
"/core/transcoding.rb",
10181019
"/core/encoding.rb",
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
# frozen_string_literal: true
2+
3+
# Copyright (c) 2014, 2023 Oracle and/or its affiliates. All rights reserved. This
4+
# code is released under a tri EPL/GPL/LGPL license. You can use it,
5+
# redistribute it and/or modify it under the terms of the:
6+
#
7+
# Eclipse Public License version 2.0, or
8+
# GNU General Public License version 2, or
9+
# GNU Lesser General Public License version 2.1.
10+
11+
# Copyright (c) 2007-2015, Evan Phoenix and contributors
12+
# All rights reserved.
13+
#
14+
# Redistribution and use in source and binary forms, with or without
15+
# modification, are permitted provided that the following conditions are met:
16+
#
17+
# * Redistributions of source code must retain the above copyright notice, this
18+
# list of conditions and the following disclaimer.
19+
# * Redistributions in binary form must reproduce the above copyright notice
20+
# this list of conditions and the following disclaimer in the documentation
21+
# and/or other materials provided with the distribution.
22+
# * Neither the name of Rubinius nor the names of its contributors
23+
# may be used to endorse or promote products derived from this software
24+
# without specific prior written permission.
25+
#
26+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
29+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
30+
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31+
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
32+
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
33+
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
34+
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36+
37+
class MatchData
38+
class << self
39+
# Prevent allocating MatchData, like MRI 2.7+, so we don't need to check if it's initialized
40+
undef_method :allocate
41+
end
42+
43+
def offset(idx)
44+
[self.begin(idx), self.end(idx)]
45+
end
46+
47+
def ==(other)
48+
return true if equal?(other)
49+
50+
Primitive.object_kind_of?(other, MatchData) &&
51+
string == other.string &&
52+
regexp == other.regexp &&
53+
captures == other.captures
54+
end
55+
alias_method :eql?, :==
56+
57+
def string
58+
Primitive.match_data_get_source(self).dup.freeze
59+
end
60+
61+
def captures
62+
to_a[1..-1]
63+
end
64+
65+
def names
66+
regexp.names
67+
end
68+
69+
def named_captures
70+
names.collect { |name| [name, self[name]] }.to_h
71+
end
72+
73+
def begin(index)
74+
backref = if String === index || Symbol === index
75+
names_to_backref = Hash[Primitive.regexp_names(self.regexp)]
76+
names_to_backref[index.to_sym].last
77+
else
78+
Truffle::Type.coerce_to(index, Integer, :to_int)
79+
end
80+
81+
82+
Primitive.match_data_begin(self, backref)
83+
end
84+
85+
def end(index)
86+
backref = if String === index || Symbol === index
87+
names_to_backref = Hash[Primitive.regexp_names(self.regexp)]
88+
names_to_backref[index.to_sym].last
89+
else
90+
Truffle::Type.coerce_to(index, Integer, :to_int)
91+
end
92+
93+
94+
Primitive.match_data_end(self, backref)
95+
end
96+
97+
def inspect
98+
str = "#<MatchData \"#{self[0]}\""
99+
idx = 0
100+
captures.zip(names) do |capture, name|
101+
idx += 1
102+
str << " #{name || idx}:#{capture.inspect}"
103+
end
104+
"#{str}>"
105+
end
106+
107+
def values_at(*indexes)
108+
out = []
109+
size = self.size
110+
111+
indexes.each do |elem|
112+
if Primitive.object_kind_of?(elem, String) || Primitive.object_kind_of?(elem, Symbol)
113+
out << self[elem]
114+
elsif Primitive.object_kind_of?(elem, Range)
115+
start, length = Primitive.range_normalized_start_length(elem, size)
116+
finish = start + length - 1
117+
118+
raise RangeError, "#{elem} out of range" if start < 0
119+
next if finish < start # ignore empty ranges
120+
121+
finish_in_bounds = [finish, size - 1].min
122+
start.upto(finish_in_bounds) do |index|
123+
out << self[index]
124+
end
125+
126+
(finish_in_bounds + 1).upto(finish) { out << nil }
127+
else
128+
index = Primitive.rb_num2int(elem)
129+
if index >= size || index < -size
130+
out << nil
131+
else
132+
out << self[index]
133+
end
134+
end
135+
end
136+
137+
out
138+
end
139+
140+
def match(n)
141+
# Similar, but #match accepts only single index/name, but not a range or an optional length.
142+
number = Truffle::Type.rb_check_convert_type(n, Integer, :to_int)
143+
return self[number] if number
144+
# To convert the last type (String) we used rb_convert_type instead of rb_check_convert_type which throws an exception
145+
name = Truffle::Type.rb_check_convert_type(n, Symbol, :to_sym) || Truffle::Type.rb_convert_type(n, String, :to_str)
146+
self[name]
147+
end
148+
149+
def match_length(n)
150+
match(n)&.length
151+
end
152+
153+
def to_s
154+
self[0]
155+
end
156+
end
157+
158+
Truffle::KernelOperations.define_hooked_variable(
159+
:$~,
160+
-> s { Primitive.regexp_last_match_get(s) },
161+
Truffle::RegexpOperations::LAST_MATCH_SET)
162+
163+
Truffle::KernelOperations.define_hooked_variable(
164+
:'$`',
165+
-> s { match = Primitive.regexp_last_match_get(s)
166+
match.pre_match if match },
167+
-> { raise SyntaxError, "Can't set variable $`" },
168+
-> s { 'global-variable' if Primitive.regexp_last_match_get(s) })
169+
170+
Truffle::KernelOperations.define_hooked_variable(
171+
:"$'",
172+
-> s { match = Primitive.regexp_last_match_get(s)
173+
match.post_match if match },
174+
-> { raise SyntaxError, "Can't set variable $'" },
175+
-> s { 'global-variable' if Primitive.regexp_last_match_get(s) })
176+
177+
Truffle::KernelOperations.define_hooked_variable(
178+
:'$&',
179+
-> s { match = Primitive.regexp_last_match_get(s)
180+
match[0] if match },
181+
-> { raise SyntaxError, "Can't set variable $&" },
182+
-> s { 'global-variable' if Primitive.regexp_last_match_get(s) })
183+
184+
Truffle::KernelOperations.define_hooked_variable(
185+
:'$+',
186+
-> s { match = Primitive.regexp_last_match_get(s)
187+
match.captures.reverse.find { |m| !Primitive.nil?(m) } if match },
188+
-> { raise SyntaxError, "Can't set variable $+" },
189+
-> s { 'global-variable' if Primitive.regexp_last_match_get(s) })

src/main/ruby/truffleruby/core/regexp.rb

Lines changed: 0 additions & 141 deletions
Original file line numberDiff line numberDiff line change
@@ -287,144 +287,3 @@ def names
287287
end
288288

289289
end
290-
291-
class MatchData
292-
class << self
293-
# Prevent allocating MatchData, like MRI 2.7+, so we don't need to check if it's initialized
294-
undef_method :allocate
295-
end
296-
297-
def offset(idx)
298-
[self.begin(idx), self.end(idx)]
299-
end
300-
301-
def ==(other)
302-
return true if equal?(other)
303-
304-
Primitive.object_kind_of?(other, MatchData) &&
305-
string == other.string &&
306-
regexp == other.regexp &&
307-
captures == other.captures
308-
end
309-
alias_method :eql?, :==
310-
311-
def string
312-
Primitive.match_data_get_source(self).dup.freeze
313-
end
314-
315-
def captures
316-
to_a[1..-1]
317-
end
318-
319-
def names
320-
regexp.names
321-
end
322-
323-
def named_captures
324-
names.collect { |name| [name, self[name]] }.to_h
325-
end
326-
327-
def begin(index)
328-
backref = if String === index || Symbol === index
329-
names_to_backref = Hash[Primitive.regexp_names(self.regexp)]
330-
names_to_backref[index.to_sym].last
331-
else
332-
Truffle::Type.coerce_to(index, Integer, :to_int)
333-
end
334-
335-
336-
Primitive.match_data_begin(self, backref)
337-
end
338-
339-
def end(index)
340-
backref = if String === index || Symbol === index
341-
names_to_backref = Hash[Primitive.regexp_names(self.regexp)]
342-
names_to_backref[index.to_sym].last
343-
else
344-
Truffle::Type.coerce_to(index, Integer, :to_int)
345-
end
346-
347-
348-
Primitive.match_data_end(self, backref)
349-
end
350-
351-
def inspect
352-
str = "#<MatchData \"#{self[0]}\""
353-
idx = 0
354-
captures.zip(names) do |capture, name|
355-
idx += 1
356-
str << " #{name || idx}:#{capture.inspect}"
357-
end
358-
"#{str}>"
359-
end
360-
361-
def values_at(*indexes)
362-
out = []
363-
size = self.size
364-
365-
indexes.each do |elem|
366-
if Primitive.object_kind_of?(elem, String) || Primitive.object_kind_of?(elem, Symbol)
367-
out << self[elem]
368-
elsif Primitive.object_kind_of?(elem, Range)
369-
start, length = Primitive.range_normalized_start_length(elem, size)
370-
finish = start + length - 1
371-
372-
raise RangeError, "#{elem} out of range" if start < 0
373-
next if finish < start # ignore empty ranges
374-
375-
finish_in_bounds = [finish, size - 1].min
376-
start.upto(finish_in_bounds) do |index|
377-
out << self[index]
378-
end
379-
380-
(finish_in_bounds + 1).upto(finish) { out << nil }
381-
else
382-
index = Primitive.rb_num2int(elem)
383-
if index >= size || index < -size
384-
out << nil
385-
else
386-
out << self[index]
387-
end
388-
end
389-
end
390-
391-
out
392-
end
393-
394-
def to_s
395-
self[0]
396-
end
397-
end
398-
399-
Truffle::KernelOperations.define_hooked_variable(
400-
:$~,
401-
-> s { Primitive.regexp_last_match_get(s) },
402-
Truffle::RegexpOperations::LAST_MATCH_SET)
403-
404-
Truffle::KernelOperations.define_hooked_variable(
405-
:'$`',
406-
-> s { match = Primitive.regexp_last_match_get(s)
407-
match.pre_match if match },
408-
-> { raise SyntaxError, "Can't set variable $`" },
409-
-> s { 'global-variable' if Primitive.regexp_last_match_get(s) })
410-
411-
Truffle::KernelOperations.define_hooked_variable(
412-
:"$'",
413-
-> s { match = Primitive.regexp_last_match_get(s)
414-
match.post_match if match },
415-
-> { raise SyntaxError, "Can't set variable $'" },
416-
-> s { 'global-variable' if Primitive.regexp_last_match_get(s) })
417-
418-
Truffle::KernelOperations.define_hooked_variable(
419-
:'$&',
420-
-> s { match = Primitive.regexp_last_match_get(s)
421-
match[0] if match },
422-
-> { raise SyntaxError, "Can't set variable $&" },
423-
-> s { 'global-variable' if Primitive.regexp_last_match_get(s) })
424-
425-
Truffle::KernelOperations.define_hooked_variable(
426-
:'$+',
427-
-> s { match = Primitive.regexp_last_match_get(s)
428-
match.captures.reverse.find { |m| !Primitive.nil?(m) } if match },
429-
-> { raise SyntaxError, "Can't set variable $+" },
430-
-> s { 'global-variable' if Primitive.regexp_last_match_get(s) })
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
exclude :test_field_size_limit_controls_lookahead, "transient timeout"
2+
exclude :test_max_field_size_controls_lookahead, "transient timeout"
3+
exclude :test_the_parse_fails_fast_when_it_can_for_unquoted_fields, "transient timeout"

test/mri/excludes/TestRegexp.rb

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,5 +37,3 @@
3737
exclude :test_rindex_regexp, "needs investigation"
3838
exclude :test_yoshidam_net_20041111_1, "needs investigation"
3939
exclude :test_match_control_meta_escape, "<0> expected but was"
40-
exclude :test_match_matchsubstring, "NoMethodError: undefined method `match' for #<MatchData \"HX1138\" 1:\"H\" 2:\"X\" 3:\"113\" 4:\"8\" 5:nil>"
41-
exclude :test_match_match_length, "NoMethodError: undefined method `match_length' for #<MatchData \"HX1138\" 1:\"H\" 2:\"X\" 3:\"113\" 4:\"8\" 5:nil>"

0 commit comments

Comments
 (0)