@@ -782,17 +782,62 @@ private <K, V> void buildAndSetDistributionHash(HashStoreLibrary hashStoreLibrar
782
782
}
783
783
}
784
784
785
- @ Primitive (name = "regexp_match_in_region" , lowerFixnum = { 2 , 3 , 5 })
786
- public abstract static class MatchInRegionNode extends PrimitiveArrayArgumentsNode {
785
+ @ Primitive (name = "regexp_search" , lowerFixnum = 2 )
786
+ public abstract static class RegexpSearchNode extends PrimitiveArrayArgumentsNode {
787
+ @ Specialization
788
+ Object match (RubyRegexp regexp , Object string , int from ,
789
+ @ Cached RubyStringLibrary libString ,
790
+ @ Cached MatchInRegionNode matchInRegionNode ) {
791
+ int to = libString .byteLength (this , string );
792
+ return matchInRegionNode .execute (regexp , string , from , to , false , 0 , true );
793
+ }
794
+ }
787
795
788
- @ NeverDefault
789
- public static MatchInRegionNode create () {
790
- return TruffleRegexpNodesFactory .MatchInRegionNodeFactory .create (null );
796
+ @ Primitive (name = "regexp_search?" , lowerFixnum = 2 )
797
+ public abstract static class RegexpSearchPredicateNode extends PrimitiveArrayArgumentsNode {
798
+ @ Specialization
799
+ boolean match (RubyRegexp regexp , Object string , int from ,
800
+ @ Cached RubyStringLibrary libString ,
801
+ @ Cached MatchInRegionNode matchInRegionNode ) {
802
+ int to = libString .byteLength (this , string );
803
+ return (boolean ) matchInRegionNode .execute (regexp , string , from , to , false , 0 , false );
804
+ }
805
+ }
806
+
807
+ @ Primitive (name = "regexp_search_backward" , lowerFixnum = 2 )
808
+ public abstract static class RegexpSearchBackwardNode extends PrimitiveArrayArgumentsNode {
809
+ @ Specialization
810
+ Object match (RubyRegexp regexp , Object string , int end ,
811
+ @ Cached MatchInRegionNode matchInRegionNode ) {
812
+ return matchInRegionNode .execute (regexp , string , end , 0 , false , 0 , true );
813
+ }
814
+ }
815
+
816
+ @ Primitive (name = "regexp_search_with_start" , lowerFixnum = { 2 , 3 }, isPublic = true )
817
+ public abstract static class RegexpSearchWithStartNode extends PrimitiveArrayArgumentsNode {
818
+ @ Specialization
819
+ Object match (RubyRegexp regexp , Object string , int from , int start ,
820
+ @ Cached RubyStringLibrary libString ,
821
+ @ Cached MatchInRegionNode matchInRegionNode ) {
822
+ int to = libString .byteLength (this , string );
823
+ return matchInRegionNode .execute (regexp , string , from , to , false , start , true );
791
824
}
825
+ }
792
826
793
- public abstract Object executeMatchInRegion (RubyRegexp regexp , Object string , int from , int to ,
794
- boolean onlyMatchAtStart , int start , boolean createMatchData );
827
+ @ Primitive (name = "regexp_match_at_start" , lowerFixnum = { 2 , 3 }, isPublic = true )
828
+ public abstract static class RegexpMatchAtStartNode extends PrimitiveArrayArgumentsNode {
829
+ @ Specialization
830
+ Object match (RubyRegexp regexp , Object string , int from , int start ,
831
+ @ Cached RubyStringLibrary libString ,
832
+ @ Cached MatchInRegionNode matchInRegionNode ) {
833
+ int to = libString .byteLength (this , string );
834
+ return matchInRegionNode .execute (regexp , string , from , to , true , start , true );
835
+ }
836
+ }
795
837
838
+ /** This node has many complicated arguments, so instead of exposing it as a Primitive we expose multiple simpler
839
+ * Primitives. */
840
+ public abstract static class MatchInRegionNode extends RubyBaseNode {
796
841
/** Matches a regular expression against a string over the specified range of characters.
797
842
*
798
843
* @param regexp The regexp to match
@@ -806,12 +851,56 @@ public abstract Object executeMatchInRegion(RubyRegexp regexp, Object string, in
806
851
* @param onlyMatchAtStart Whether to only match at the beginning of the string, if false then the regexp can
807
852
* have any amount of prematch.
808
853
*
809
- * @param start The position within the string which the matcher should consider the start. Setting this to the
810
- * from position allows scanners to match starting part-way through a string while still setting
811
- * onlyMatchAtStart and thus forcing the match to be at the specific starting position.
854
+ * @param start The position within the string which the matcher should consider the "start" for \A, ^ and
855
+ * onlyMatchAtStart. It is functionally equivalent to matching against a substring of string starting
856
+ * at start, but also returning the correct offsets in the MatchData. The value can only be 0 or
857
+ * equal to from. Setting this to the from position allows StringScanner to match starting part-way
858
+ * through a string while still setting onlyMatchAtStart and thus forcing the match to start at the
859
+ * specific starting position.
812
860
*
813
861
* @param createMatchData Whether to create a Ruby `MatchData` object with the results of the match or return a
814
862
* simple Boolean value indicating a successful match (true: match; false: mismatch). */
863
+ public abstract Object execute (RubyRegexp regexp , Object string , int from , int to , boolean onlyMatchAtStart ,
864
+ int start , boolean createMatchData );
865
+
866
+ @ Specialization
867
+ Object match (
868
+ RubyRegexp regexp ,
869
+ Object string ,
870
+ int from ,
871
+ int to ,
872
+ boolean onlyMatchAtStart ,
873
+ int start ,
874
+ boolean createMatchData ,
875
+ @ Cached LazyMatchInRegionJoniNode matchInRegionJoniNode ,
876
+ @ Cached MatchInRegionTRegexNode matchInRegionTRegexNode ,
877
+ @ Cached LazyDispatchNode callCompareEngines ) {
878
+ if (getContext ().getOptions ().COMPARE_REGEX_ENGINES ) {
879
+ return callCompareEngines .get (this ).call (coreLibrary ().truffleRegexpOperationsModule ,
880
+ "match_in_region_compare_engines" ,
881
+ new Object []{ regexp , string , from , to , onlyMatchAtStart , start , createMatchData });
882
+ } else if (getContext ().getOptions ().USE_TRUFFLE_REGEX ) {
883
+ return matchInRegionTRegexNode .execute (regexp , string , from , to , onlyMatchAtStart , start ,
884
+ createMatchData );
885
+ } else {
886
+ return matchInRegionJoniNode .get (this ).execute (regexp , string , from , to , onlyMatchAtStart , start ,
887
+ createMatchData );
888
+ }
889
+ }
890
+ }
891
+
892
+ @ Primitive (name = "regexp_match_in_region_joni" , lowerFixnum = { 2 , 3 , 5 })
893
+ public abstract static class MatchInRegionJoniNode extends PrimitiveArrayArgumentsNode {
894
+
895
+ @ NeverDefault
896
+ public static MatchInRegionJoniNode create () {
897
+ return TruffleRegexpNodesFactory .MatchInRegionJoniNodeFactory .create (null );
898
+ }
899
+
900
+ public abstract Object execute (RubyRegexp regexp , Object string , int from , int to , boolean onlyMatchAtStart ,
901
+ int start , boolean createMatchData );
902
+
903
+ /** See {@link MatchInRegionNode#match} for parameters documentation */
815
904
@ Specialization
816
905
static Object matchInRegion (
817
906
RubyRegexp regexp ,
@@ -862,17 +951,17 @@ static Object matchInRegion(
862
951
863
952
@ GenerateCached (false )
864
953
@ GenerateInline
865
- public abstract static class LazyMatchInRegionNode extends RubyBaseNode {
954
+ public abstract static class LazyMatchInRegionJoniNode extends RubyBaseNode {
866
955
867
- public final MatchInRegionNode get (Node node ) {
956
+ public final MatchInRegionJoniNode get (Node node ) {
868
957
return execute (node );
869
958
}
870
959
871
- protected abstract MatchInRegionNode execute (Node node );
960
+ protected abstract MatchInRegionJoniNode execute (Node node );
872
961
873
962
@ Specialization
874
- static MatchInRegionNode doLazy (
875
- @ Cached (inline = false ) MatchInRegionNode matchInRegionNode ) {
963
+ static MatchInRegionJoniNode doLazy (
964
+ @ Cached (inline = false ) MatchInRegionJoniNode matchInRegionNode ) {
876
965
return matchInRegionNode ;
877
966
}
878
967
}
@@ -897,8 +986,15 @@ static TruffleString.SubstringByteIndexNode doLazy(
897
986
@ Primitive (name = "regexp_match_in_region_tregex" , lowerFixnum = { 2 , 3 , 5 })
898
987
public abstract static class MatchInRegionTRegexNode extends PrimitiveArrayArgumentsNode {
899
988
989
+ @ NeverDefault
990
+ public static MatchInRegionTRegexNode create () {
991
+ return TruffleRegexpNodesFactory .MatchInRegionTRegexNodeFactory .create (null );
992
+ }
993
+
994
+ public abstract Object execute (RubyRegexp regexp , Object string , int from , int to , boolean onlyMatchAtStart ,
995
+ int start , boolean createMatchData );
900
996
901
- /** See {@link MatchInRegionNode#matchInRegion } for parameters documentation */
997
+ /** See {@link MatchInRegionNode#match } for parameters documentation */
902
998
@ Specialization
903
999
static Object matchInRegionTRegex (
904
1000
RubyRegexp regexp ,
@@ -925,7 +1021,7 @@ static Object matchInRegionTRegex(
925
1021
@ Cached LazyDispatchNode warnOnFallbackNode ,
926
1022
@ Cached LazyDispatchNode stringDupNode ,
927
1023
@ Cached TranslateInteropExceptionNode translateInteropExceptionNode ,
928
- @ Cached LazyMatchInRegionNode fallbackMatchInRegionNode ,
1024
+ @ Cached LazyMatchInRegionJoniNode fallbackMatchInRegionJoniNode ,
929
1025
@ Cached LazyTruffleStringSubstringByteIndexNode substringByteIndexNode ,
930
1026
@ Cached InlinedConditionProfile nonZeroStart ,
931
1027
@ Cached FixupMatchDataStartNode fixupMatchDataStartNode ,
@@ -953,7 +1049,7 @@ static Object matchInRegionTRegex(
953
1049
start ,
954
1050
createMatchData ,
955
1051
warnOnFallbackNode ,
956
- fallbackMatchInRegionNode .get (node ));
1052
+ fallbackMatchInRegionJoniNode .get (node ));
957
1053
}
958
1054
959
1055
if (getContext (node ).getOptions ().REGEXP_INSTRUMENT_MATCH ) {
@@ -1013,8 +1109,8 @@ static Object matchInRegionTRegex(
1013
1109
profileAndReportLoopCount (node , loopProfile , groupCount );
1014
1110
}
1015
1111
1016
- var matchData = createMatchData ( node , regexp , dupString ( string , stringDupNode .get (node )), region ,
1017
- result );
1112
+ var dupedString = stringDupNode .get (node ). call ( string , "dup" );
1113
+ var matchData = createMatchData ( node , regexp , dupedString , region , result );
1018
1114
if (nonZeroStart .profile (node , start != 0 )) {
1019
1115
fixupMatchDataStartNode .execute (node , matchData , start );
1020
1116
}
@@ -1029,7 +1125,7 @@ static Object matchInRegionTRegex(
1029
1125
1030
1126
private static Object fallbackToJoni (Node node , RubyRegexp regexp , Object string , RubyEncoding encoding ,
1031
1127
int from , int to , boolean onlyMatchAtStart , int start , boolean createMatchData ,
1032
- LazyDispatchNode warnOnFallbackNode , MatchInRegionNode fallbackMatchInRegionNode ) {
1128
+ LazyDispatchNode warnOnFallbackNode , MatchInRegionJoniNode fallbackMatchInRegionJoniNode ) {
1033
1129
if (getContext (node ).getOptions ().WARN_TRUFFLE_REGEX_MATCH_FALLBACK ) {
1034
1130
1035
1131
warnOnFallbackNode .get (node ).call (
@@ -1045,8 +1141,8 @@ private static Object fallbackToJoni(Node node, RubyRegexp regexp, Object string
1045
1141
start });
1046
1142
}
1047
1143
1048
- return fallbackMatchInRegionNode
1049
- .executeMatchInRegion (regexp , string , from , to , onlyMatchAtStart , start , createMatchData );
1144
+ return fallbackMatchInRegionJoniNode
1145
+ .execute (regexp , string , from , to , onlyMatchAtStart , start , createMatchData );
1050
1146
}
1051
1147
1052
1148
private static RubyMatchData createMatchData (Node node , RubyRegexp regexp , Object string , MultiRegion region ,
@@ -1062,9 +1158,6 @@ private static RubyMatchData createMatchData(Node node, RubyRegexp regexp, Objec
1062
1158
return matchData ;
1063
1159
}
1064
1160
1065
- private static Object dupString (Object string , DispatchNode stringDupNode ) {
1066
- return stringDupNode .call (string , "dup" );
1067
- }
1068
1161
}
1069
1162
1070
1163
@ CoreMethod (names = "linear_time?" , onSingleton = true , required = 1 )
@@ -1091,15 +1184,12 @@ Object isLinearTime(RubyRegexp regexp,
1091
1184
1092
1185
public abstract static class MatchNode extends RubyBaseNode {
1093
1186
1094
- @ Child private DispatchNode dupNode = DispatchNode .create ();
1095
-
1096
1187
public abstract Object execute (RubyRegexp regexp , Object string , Matcher matcher ,
1097
1188
int from , int to , boolean onlyMatchAtStart , int start , boolean createMatchData );
1098
1189
1099
1190
// Creating a MatchData will store a copy of the source string. It's tempting to use a rope here, but a bit
1100
1191
// inconvenient because we can't work with ropes directly in Ruby and some MatchData methods are nicely
1101
- // implemented using the source string data. Likewise, we need to taint objects based on the source string's
1102
- // taint state. We mustn't allow the source string's contents to change, however, so we must ensure that we have
1192
+ // implemented using the source string data. We mustn't allow the source string's contents to change, however, so we must ensure that we have
1103
1193
// a private copy of that string. Since the source string would otherwise be a reference to string held outside
1104
1194
// the MatchData object, it would be possible for the source string to be modified externally.
1105
1195
//
@@ -1117,6 +1207,7 @@ Object match(
1117
1207
boolean onlyMatchAtStart ,
1118
1208
int start ,
1119
1209
boolean createMatchData ,
1210
+ @ Cached LazyDispatchNode stringDupNode ,
1120
1211
@ Cached InlinedConditionProfile createMatchDataProfile ,
1121
1212
@ Cached InlinedConditionProfile mismatchProfile ,
1122
1213
@ Cached InlinedConditionProfile nonZeroStart ,
@@ -1142,7 +1233,7 @@ Object match(
1142
1233
final MultiRegion region = getMatcherEagerRegion (matcher );
1143
1234
assert assertValidRegion (region );
1144
1235
1145
- final RubyString dupedString = ( RubyString ) dupNode .call (string , "dup" );
1236
+ var dupedString = stringDupNode . get ( this ) .call (string , "dup" );
1146
1237
RubyMatchData result = new RubyMatchData (
1147
1238
coreLibrary ().matchDataClass ,
1148
1239
getLanguage ().matchDataShape ,
0 commit comments