Skip to content

Commit 2409d97

Browse files
committed
Turn RegexpOperations.match_in_region into multiple simpler Primitives
1 parent 88f7f54 commit 2409d97

File tree

7 files changed

+140
-68
lines changed

7 files changed

+140
-68
lines changed

lib/truffle/strscan.rb

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,11 @@ def peep(len)
386386
md = scan_internal_string_pattern(pattern, only_match_at_start)
387387
else
388388
start = @fixed_anchor ? 0 : @pos
389-
md = Truffle::RegexpOperations.match_in_region pattern, @string, @pos, @string.bytesize, only_match_at_start, start
389+
if only_match_at_start
390+
md = Primitive.regexp_match_at_start(pattern, @string, @pos, start)
391+
else
392+
md = Primitive.regexp_search_with_start(pattern, @string, @pos, start)
393+
end
390394
end
391395

392396
if md

spec/truffle/splitting_spec.rb

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,6 @@
3535
Regexp.instance_method(:match?),
3636
Truffle::RegexpOperations.method(:match),
3737
Truffle::RegexpOperations.method(:match?),
38-
Truffle::RegexpOperations.method(:search_region),
39-
Truffle::RegexpOperations.method(:match_in_region),
4038
4139
String.instance_method(:[]),
4240
Truffle::StringOperations.method(:subpattern),

src/main/java/org/truffleruby/core/regexp/TruffleRegexpNodes.java

Lines changed: 122 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -782,17 +782,62 @@ private <K, V> void buildAndSetDistributionHash(HashStoreLibrary hashStoreLibrar
782782
}
783783
}
784784

785-
@Primitive(name = "regexp_match_in_region", lowerFixnum = { 2, 3, 5 })
786-
public abstract static class MatchInRegionNode extends PrimitiveArrayArgumentsNode {
785+
@Primitive(name = "regexp_search", lowerFixnum = 2)
786+
public abstract static class RegexpSearchNode extends PrimitiveArrayArgumentsNode {
787+
@Specialization
788+
Object match(RubyRegexp regexp, Object string, int from,
789+
@Cached RubyStringLibrary libString,
790+
@Cached MatchInRegionNode matchInRegionNode) {
791+
int to = libString.byteLength(this, string);
792+
return matchInRegionNode.execute(regexp, string, from, to, false, 0, true);
793+
}
794+
}
787795

788-
@NeverDefault
789-
public static MatchInRegionNode create() {
790-
return TruffleRegexpNodesFactory.MatchInRegionNodeFactory.create(null);
796+
@Primitive(name = "regexp_search?", lowerFixnum = 2)
797+
public abstract static class RegexpSearchPredicateNode extends PrimitiveArrayArgumentsNode {
798+
@Specialization
799+
boolean match(RubyRegexp regexp, Object string, int from,
800+
@Cached RubyStringLibrary libString,
801+
@Cached MatchInRegionNode matchInRegionNode) {
802+
int to = libString.byteLength(this, string);
803+
return (boolean) matchInRegionNode.execute(regexp, string, from, to, false, 0, false);
804+
}
805+
}
806+
807+
@Primitive(name = "regexp_search_backward", lowerFixnum = 2)
808+
public abstract static class RegexpSearchBackwardNode extends PrimitiveArrayArgumentsNode {
809+
@Specialization
810+
Object match(RubyRegexp regexp, Object string, int end,
811+
@Cached MatchInRegionNode matchInRegionNode) {
812+
return matchInRegionNode.execute(regexp, string, end, 0, false, 0, true);
813+
}
814+
}
815+
816+
@Primitive(name = "regexp_search_with_start", lowerFixnum = { 2, 3 }, isPublic = true)
817+
public abstract static class RegexpSearchWithStartNode extends PrimitiveArrayArgumentsNode {
818+
@Specialization
819+
Object match(RubyRegexp regexp, Object string, int from, int start,
820+
@Cached RubyStringLibrary libString,
821+
@Cached MatchInRegionNode matchInRegionNode) {
822+
int to = libString.byteLength(this, string);
823+
return matchInRegionNode.execute(regexp, string, from, to, false, start, true);
791824
}
825+
}
792826

793-
public abstract Object executeMatchInRegion(RubyRegexp regexp, Object string, int from, int to,
794-
boolean onlyMatchAtStart, int start, boolean createMatchData);
827+
@Primitive(name = "regexp_match_at_start", lowerFixnum = { 2, 3 }, isPublic = true)
828+
public abstract static class RegexpMatchAtStartNode extends PrimitiveArrayArgumentsNode {
829+
@Specialization
830+
Object match(RubyRegexp regexp, Object string, int from, int start,
831+
@Cached RubyStringLibrary libString,
832+
@Cached MatchInRegionNode matchInRegionNode) {
833+
int to = libString.byteLength(this, string);
834+
return matchInRegionNode.execute(regexp, string, from, to, true, start, true);
835+
}
836+
}
795837

838+
/** This node has many complicated arguments, so instead of exposing it as a Primitive we expose multiple simpler
839+
* Primitives. */
840+
public abstract static class MatchInRegionNode extends RubyBaseNode {
796841
/** Matches a regular expression against a string over the specified range of characters.
797842
*
798843
* @param regexp The regexp to match
@@ -806,12 +851,56 @@ public abstract Object executeMatchInRegion(RubyRegexp regexp, Object string, in
806851
* @param onlyMatchAtStart Whether to only match at the beginning of the string, if false then the regexp can
807852
* have any amount of prematch.
808853
*
809-
* @param start The position within the string which the matcher should consider the start. Setting this to the
810-
* from position allows scanners to match starting part-way through a string while still setting
811-
* onlyMatchAtStart and thus forcing the match to be at the specific starting position.
854+
* @param start The position within the string which the matcher should consider the "start" for \A, ^ and
855+
* onlyMatchAtStart. It is functionally equivalent to matching against a substring of string starting
856+
* at start, but also returning the correct offsets in the MatchData. The value can only be 0 or
857+
* equal to from. Setting this to the from position allows StringScanner to match starting part-way
858+
* through a string while still setting onlyMatchAtStart and thus forcing the match to start at the
859+
* specific starting position.
812860
*
813861
* @param createMatchData Whether to create a Ruby `MatchData` object with the results of the match or return a
814862
* simple Boolean value indicating a successful match (true: match; false: mismatch). */
863+
public abstract Object execute(RubyRegexp regexp, Object string, int from, int to, boolean onlyMatchAtStart,
864+
int start, boolean createMatchData);
865+
866+
@Specialization
867+
Object match(
868+
RubyRegexp regexp,
869+
Object string,
870+
int from,
871+
int to,
872+
boolean onlyMatchAtStart,
873+
int start,
874+
boolean createMatchData,
875+
@Cached LazyMatchInRegionJoniNode matchInRegionJoniNode,
876+
@Cached MatchInRegionTRegexNode matchInRegionTRegexNode,
877+
@Cached LazyDispatchNode callCompareEngines) {
878+
if (getContext().getOptions().COMPARE_REGEX_ENGINES) {
879+
return callCompareEngines.get(this).call(coreLibrary().truffleRegexpOperationsModule,
880+
"match_in_region_compare_engines",
881+
new Object[]{ regexp, string, from, to, onlyMatchAtStart, start, createMatchData });
882+
} else if (getContext().getOptions().USE_TRUFFLE_REGEX) {
883+
return matchInRegionTRegexNode.execute(regexp, string, from, to, onlyMatchAtStart, start,
884+
createMatchData);
885+
} else {
886+
return matchInRegionJoniNode.get(this).execute(regexp, string, from, to, onlyMatchAtStart, start,
887+
createMatchData);
888+
}
889+
}
890+
}
891+
892+
@Primitive(name = "regexp_match_in_region_joni", lowerFixnum = { 2, 3, 5 })
893+
public abstract static class MatchInRegionJoniNode extends PrimitiveArrayArgumentsNode {
894+
895+
@NeverDefault
896+
public static MatchInRegionJoniNode create() {
897+
return TruffleRegexpNodesFactory.MatchInRegionJoniNodeFactory.create(null);
898+
}
899+
900+
public abstract Object execute(RubyRegexp regexp, Object string, int from, int to, boolean onlyMatchAtStart,
901+
int start, boolean createMatchData);
902+
903+
/** See {@link MatchInRegionNode#match} for parameters documentation */
815904
@Specialization
816905
static Object matchInRegion(
817906
RubyRegexp regexp,
@@ -862,17 +951,17 @@ static Object matchInRegion(
862951

863952
@GenerateCached(false)
864953
@GenerateInline
865-
public abstract static class LazyMatchInRegionNode extends RubyBaseNode {
954+
public abstract static class LazyMatchInRegionJoniNode extends RubyBaseNode {
866955

867-
public final MatchInRegionNode get(Node node) {
956+
public final MatchInRegionJoniNode get(Node node) {
868957
return execute(node);
869958
}
870959

871-
protected abstract MatchInRegionNode execute(Node node);
960+
protected abstract MatchInRegionJoniNode execute(Node node);
872961

873962
@Specialization
874-
static MatchInRegionNode doLazy(
875-
@Cached(inline = false) MatchInRegionNode matchInRegionNode) {
963+
static MatchInRegionJoniNode doLazy(
964+
@Cached(inline = false) MatchInRegionJoniNode matchInRegionNode) {
876965
return matchInRegionNode;
877966
}
878967
}
@@ -897,8 +986,15 @@ static TruffleString.SubstringByteIndexNode doLazy(
897986
@Primitive(name = "regexp_match_in_region_tregex", lowerFixnum = { 2, 3, 5 })
898987
public abstract static class MatchInRegionTRegexNode extends PrimitiveArrayArgumentsNode {
899988

989+
@NeverDefault
990+
public static MatchInRegionTRegexNode create() {
991+
return TruffleRegexpNodesFactory.MatchInRegionTRegexNodeFactory.create(null);
992+
}
993+
994+
public abstract Object execute(RubyRegexp regexp, Object string, int from, int to, boolean onlyMatchAtStart,
995+
int start, boolean createMatchData);
900996

901-
/** See {@link MatchInRegionNode#matchInRegion} for parameters documentation */
997+
/** See {@link MatchInRegionNode#match} for parameters documentation */
902998
@Specialization
903999
static Object matchInRegionTRegex(
9041000
RubyRegexp regexp,
@@ -925,7 +1021,7 @@ static Object matchInRegionTRegex(
9251021
@Cached LazyDispatchNode warnOnFallbackNode,
9261022
@Cached LazyDispatchNode stringDupNode,
9271023
@Cached TranslateInteropExceptionNode translateInteropExceptionNode,
928-
@Cached LazyMatchInRegionNode fallbackMatchInRegionNode,
1024+
@Cached LazyMatchInRegionJoniNode fallbackMatchInRegionJoniNode,
9291025
@Cached LazyTruffleStringSubstringByteIndexNode substringByteIndexNode,
9301026
@Cached InlinedConditionProfile nonZeroStart,
9311027
@Cached FixupMatchDataStartNode fixupMatchDataStartNode,
@@ -953,7 +1049,7 @@ static Object matchInRegionTRegex(
9531049
start,
9541050
createMatchData,
9551051
warnOnFallbackNode,
956-
fallbackMatchInRegionNode.get(node));
1052+
fallbackMatchInRegionJoniNode.get(node));
9571053
}
9581054

9591055
if (getContext(node).getOptions().REGEXP_INSTRUMENT_MATCH) {
@@ -1013,8 +1109,8 @@ static Object matchInRegionTRegex(
10131109
profileAndReportLoopCount(node, loopProfile, groupCount);
10141110
}
10151111

1016-
var matchData = createMatchData(node, regexp, dupString(string, stringDupNode.get(node)), region,
1017-
result);
1112+
var dupedString = stringDupNode.get(node).call(string, "dup");
1113+
var matchData = createMatchData(node, regexp, dupedString, region, result);
10181114
if (nonZeroStart.profile(node, start != 0)) {
10191115
fixupMatchDataStartNode.execute(node, matchData, start);
10201116
}
@@ -1029,7 +1125,7 @@ static Object matchInRegionTRegex(
10291125

10301126
private static Object fallbackToJoni(Node node, RubyRegexp regexp, Object string, RubyEncoding encoding,
10311127
int from, int to, boolean onlyMatchAtStart, int start, boolean createMatchData,
1032-
LazyDispatchNode warnOnFallbackNode, MatchInRegionNode fallbackMatchInRegionNode) {
1128+
LazyDispatchNode warnOnFallbackNode, MatchInRegionJoniNode fallbackMatchInRegionJoniNode) {
10331129
if (getContext(node).getOptions().WARN_TRUFFLE_REGEX_MATCH_FALLBACK) {
10341130

10351131
warnOnFallbackNode.get(node).call(
@@ -1045,8 +1141,8 @@ private static Object fallbackToJoni(Node node, RubyRegexp regexp, Object string
10451141
start });
10461142
}
10471143

1048-
return fallbackMatchInRegionNode
1049-
.executeMatchInRegion(regexp, string, from, to, onlyMatchAtStart, start, createMatchData);
1144+
return fallbackMatchInRegionJoniNode
1145+
.execute(regexp, string, from, to, onlyMatchAtStart, start, createMatchData);
10501146
}
10511147

10521148
private static RubyMatchData createMatchData(Node node, RubyRegexp regexp, Object string, MultiRegion region,
@@ -1062,9 +1158,6 @@ private static RubyMatchData createMatchData(Node node, RubyRegexp regexp, Objec
10621158
return matchData;
10631159
}
10641160

1065-
private static Object dupString(Object string, DispatchNode stringDupNode) {
1066-
return stringDupNode.call(string, "dup");
1067-
}
10681161
}
10691162

10701163
@CoreMethod(names = "linear_time?", onSingleton = true, required = 1)
@@ -1091,15 +1184,12 @@ Object isLinearTime(RubyRegexp regexp,
10911184

10921185
public abstract static class MatchNode extends RubyBaseNode {
10931186

1094-
@Child private DispatchNode dupNode = DispatchNode.create();
1095-
10961187
public abstract Object execute(RubyRegexp regexp, Object string, Matcher matcher,
10971188
int from, int to, boolean onlyMatchAtStart, int start, boolean createMatchData);
10981189

10991190
// Creating a MatchData will store a copy of the source string. It's tempting to use a rope here, but a bit
11001191
// inconvenient because we can't work with ropes directly in Ruby and some MatchData methods are nicely
1101-
// implemented using the source string data. Likewise, we need to taint objects based on the source string's
1102-
// taint state. We mustn't allow the source string's contents to change, however, so we must ensure that we have
1192+
// implemented using the source string data. We mustn't allow the source string's contents to change, however, so we must ensure that we have
11031193
// a private copy of that string. Since the source string would otherwise be a reference to string held outside
11041194
// the MatchData object, it would be possible for the source string to be modified externally.
11051195
//
@@ -1117,6 +1207,7 @@ Object match(
11171207
boolean onlyMatchAtStart,
11181208
int start,
11191209
boolean createMatchData,
1210+
@Cached LazyDispatchNode stringDupNode,
11201211
@Cached InlinedConditionProfile createMatchDataProfile,
11211212
@Cached InlinedConditionProfile mismatchProfile,
11221213
@Cached InlinedConditionProfile nonZeroStart,
@@ -1142,7 +1233,7 @@ Object match(
11421233
final MultiRegion region = getMatcherEagerRegion(matcher);
11431234
assert assertValidRegion(region);
11441235

1145-
final RubyString dupedString = (RubyString) dupNode.call(string, "dup");
1236+
var dupedString = stringDupNode.get(this).call(string, "dup");
11461237
RubyMatchData result = new RubyMatchData(
11471238
coreLibrary().matchDataClass,
11481239
getLanguage().matchDataShape,

src/main/ruby/truffleruby/core/string.rb

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ def self.try_convert(obj)
149149
def =~(pattern)
150150
case pattern
151151
when Regexp
152-
match_data = Truffle::RegexpOperations.search_region(pattern, self, 0, bytesize, true, true)
152+
match_data = Primitive.regexp_search(pattern, self, 0)
153153
Primitive.regexp_last_match_set(Primitive.caller_special_variables, match_data)
154154
return match_data.begin(0) if match_data
155155
when String
@@ -280,7 +280,7 @@ def partition(pattern = nil)
280280

281281
def rpartition(pattern)
282282
if Primitive.is_a?(pattern, Regexp)
283-
if m = Truffle::RegexpOperations.search_region(pattern, self, 0, bytesize, false, true)
283+
if m = Primitive.regexp_search_backward(pattern, self, bytesize)
284284
Primitive.regexp_last_match_set(Primitive.caller_special_variables, m)
285285
return [m.pre_match, m[0], m.post_match]
286286
end
@@ -1153,7 +1153,7 @@ def rindex(sub, finish = undefined)
11531153
if Primitive.is_a?(sub, Regexp)
11541154
Primitive.regexp_check_encoding(sub, self)
11551155

1156-
match_data = Truffle::RegexpOperations.search_region(sub, self, 0, byte_finish, false, true)
1156+
match_data = Primitive.regexp_search_backward(sub, self, byte_finish)
11571157
Primitive.regexp_last_match_set(Primitive.caller_special_variables, match_data)
11581158
return match_data.begin(0) if match_data
11591159

@@ -1224,7 +1224,7 @@ def byterindex(str, finish = bytesize)
12241224
if Primitive.is_a?(str, Regexp)
12251225
Primitive.regexp_check_encoding(str, self)
12261226

1227-
match = Truffle::RegexpOperations.search_region(str, self, 0, finish, false, true)
1227+
match = Primitive.regexp_search_backward(str, self, finish)
12281228
Primitive.regexp_last_match_set(Primitive.caller_special_variables, match)
12291229
return match ? Primitive.match_data_byte_begin(match, 0) : nil
12301230
end
@@ -1253,7 +1253,7 @@ def start_with?(*prefixes)
12531253
prefixes.each do |original_prefix|
12541254
case original_prefix
12551255
when Regexp
1256-
match_data = Truffle::RegexpOperations.match_in_region(original_prefix, self, 0, bytesize, true, 0)
1256+
match_data = Primitive.regexp_match_at_start(original_prefix, self, 0, 0)
12571257
Primitive.regexp_last_match_set(storage, match_data)
12581258
return true if match_data
12591259
else

src/main/ruby/truffleruby/core/symbol.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def =~(pattern)
9595

9696
case pattern
9797
when Regexp
98-
match_data = Truffle::RegexpOperations.search_region(pattern, str, 0, str.bytesize, true, true)
98+
match_data = Primitive.regexp_search(pattern, str, 0)
9999
Primitive.regexp_last_match_set(Primitive.caller_special_variables, match_data)
100100
Primitive.match_data_byte_begin(match_data, 0) if match_data
101101
when String
@@ -146,7 +146,7 @@ def [](index, other = undefined)
146146
end
147147

148148
str = to_s
149-
match_data = Truffle::RegexpOperations.search_region(index, str, 0, str.bytesize, true, true)
149+
match_data = Primitive.regexp_search(index, str, 0)
150150
Primitive.regexp_last_match_set(Primitive.caller_special_variables, match_data)
151151
if match_data
152152
result = match_data.to_s

0 commit comments

Comments
 (0)