16
16
import java .util .concurrent .ConcurrentHashMap ;
17
17
import java .util .concurrent .atomic .AtomicInteger ;
18
18
19
- import com .oracle .truffle .api .CompilerDirectives ;
20
19
import com .oracle .truffle .api .library .CachedLibrary ;
21
20
import com .oracle .truffle .api .profiles .BranchProfile ;
22
21
import com .oracle .truffle .api .source .Source ;
41
40
import org .truffleruby .core .array .ArrayBuilderNode ;
42
41
import org .truffleruby .core .array .ArrayBuilderNode .BuilderState ;
43
42
import org .truffleruby .core .array .RubyArray ;
43
+ import org .truffleruby .core .encoding .EncodingNodes ;
44
44
import org .truffleruby .core .encoding .RubyEncoding ;
45
45
import org .truffleruby .core .kernel .KernelNodes .SameOrEqualNode ;
46
46
import org .truffleruby .core .regexp .RegexpNodes .ToSNode ;
75
75
@ CoreModule ("Truffle::RegexpOperations" )
76
76
public class TruffleRegexpNodes {
77
77
78
- @ TruffleBoundary
79
- public static Matcher createMatcher ( RubyContext context , RubyRegexp regexp , Rope stringRope , byte [] stringBytes ,
80
- boolean encodingConversion , int start , Node currentNode ) {
81
- final Encoding enc = checkEncoding ( regexp , stringRope . getEncoding (), stringRope . getCodeRange () );
82
- Regex regex = regexp . regex ;
78
+ // rb_reg_prepare_enc ... mostly. Some of the error checks are performed by callers of this method.
79
+ public abstract static class CheckEncodingNode extends RubyContextNode {
80
+
81
+ @ Child RopeNodes . CodeRangeNode codeRangeNode = RopeNodes . CodeRangeNode . create ( );
82
+ @ Child RubyStringLibrary stringLibrary = RubyStringLibrary . getFactory (). createDispatched ( 2 ) ;
83
83
84
- if (encodingConversion && regex .getEncoding () != enc ) {
85
- EncodingCache encodingCache = regexp .cachedEncodings ;
86
- regex = encodingCache .getOrCreate (enc , e -> makeRegexpForEncoding (context , regexp , e , currentNode ));
84
+ public static CheckEncodingNode create () {
85
+ return TruffleRegexpNodesFactory .CheckEncodingNodeGen .create ();
87
86
}
88
87
89
- return regex .matcher (stringBytes , start , stringBytes .length );
90
- }
88
+ public final Encoding executeCheckEncoding (RubyRegexp regexp , Object string ) {
89
+ return executeInternal (regexp , stringLibrary .getRope (string ));
90
+ }
91
91
92
- @ TruffleBoundary
93
- public static Encoding checkEncoding (RubyRegexp regexp , Encoding strEnc , CodeRange codeRange ) {
94
- final Encoding regexEnc = regexp .regex .getEncoding ();
92
+ public abstract Encoding executeInternal (RubyRegexp regexp , Rope rope );
93
+
94
+ @ Specialization (guards = {
95
+ "!isSameEncoding(regexp, rope)" ,
96
+ "isUSASCII(regexp, rope)"
97
+ })
98
+ protected Encoding checkEncodingAsciiOnly (RubyRegexp regexp , Rope rope ) {
99
+ return USASCIIEncoding .INSTANCE ;
100
+ }
95
101
96
- if (strEnc == regexEnc ) {
97
- return regexEnc ;
98
- } else if (regexEnc == USASCIIEncoding .INSTANCE && codeRange == CodeRange .CR_7BIT ) {
99
- return regexEnc ;
100
- } else if (strEnc .isAsciiCompatible () && regexp .options .isFixed ()) {
101
- return regexEnc ;
102
+ @ Specialization (guards = {
103
+ "isSameEncoding(regexp, rope)"
104
+ })
105
+ protected Encoding checkEncodingSameEncoding (RubyRegexp regexp , Rope rope ) {
106
+ return regexp .regex .getEncoding ();
102
107
}
103
- return strEnc ;
108
+
109
+ @ Specialization (guards = {
110
+ "!isSameEncoding(regexp, rope)" ,
111
+ "!isUSASCII(regexp, rope)" ,
112
+ "isFixedEncoding(regexp, rope)" ,
113
+ })
114
+ protected Encoding checkEncodingFixedEncoding (RubyRegexp regexp , Rope rope ) {
115
+ return regexp .regex .getEncoding ();
116
+ }
117
+
118
+ @ Specialization (guards = {
119
+ "!isSameEncoding(regexp, rope)" ,
120
+ "!isUSASCII(regexp, rope)" ,
121
+ "!isFixedEncoding(regexp, rope)"
122
+ })
123
+ protected Encoding fallback (RubyRegexp regexp , Rope rope ) {
124
+ return rope .encoding ;
125
+ }
126
+
127
+ protected boolean isSameEncoding (RubyRegexp regexp , Rope rope ) {
128
+ return regexp .regex .getEncoding () == rope .encoding ;
129
+ }
130
+
131
+ protected boolean isUSASCII (RubyRegexp regexp , Rope rope ) {
132
+ return regexp .regex .getEncoding () == USASCIIEncoding .INSTANCE &&
133
+ codeRangeNode .execute (rope ) == CodeRange .CR_7BIT ;
134
+ }
135
+
136
+ protected boolean isFixedEncoding (RubyRegexp regexp , Rope rope ) {
137
+ return regexp .options .isFixed () && rope .encoding .isAsciiCompatible ();
138
+ }
139
+
140
+ }
141
+
142
+ @ TruffleBoundary
143
+ private static Matcher getMatcher (Regex regex , byte [] stringBytes , int start ) {
144
+ return regex .matcher (stringBytes , start , stringBytes .length );
104
145
}
105
146
147
+ @ TruffleBoundary
106
148
private static Regex makeRegexpForEncoding (RubyContext context , RubyRegexp regexp , Encoding enc , Node currentNode ) {
107
149
final Encoding [] fixedEnc = new Encoding []{ null };
108
150
final Rope sourceRope = regexp .source ;
@@ -197,26 +239,16 @@ public RubyRegexp createRegexp(Rope pattern) throws DeferredRaiseException {
197
239
}
198
240
}
199
241
200
- @ CoreMethod (names = "select_encoding" , onSingleton = true , required = 3 )
242
+ @ CoreMethod (names = "select_encoding" , onSingleton = true , required = 2 )
201
243
public abstract static class SelectEncodingNode extends CoreMethodArrayArgumentsNode {
202
244
203
- @ Child RopeNodes .CodeRangeNode codeRangeNode ;
204
-
205
245
@ Specialization (guards = "libString.isRubyString(str)" )
206
- protected RubyEncoding selectEncoding (RubyRegexp re , Object str , boolean encodingConversion ,
246
+ protected RubyEncoding selectEncoding (RubyRegexp re , Object str ,
247
+ @ Cached EncodingNodes .GetRubyEncodingNode getRubyEncodingNode ,
248
+ @ Cached CheckEncodingNode checkEncodingNode ,
207
249
@ CachedLibrary (limit = "2" ) RubyStringLibrary libString ) {
208
- Encoding encoding ;
209
- if (encodingConversion ) {
210
- Rope stringRope = libString .getRope (str );
211
- if (codeRangeNode == null ) {
212
- CompilerDirectives .transferToInterpreterAndInvalidate ();
213
- codeRangeNode = insert (RopeNodes .CodeRangeNode .create ());
214
- }
215
- encoding = checkEncoding (re , stringRope .getEncoding (), codeRangeNode .execute (stringRope ));
216
- } else {
217
- encoding = re .regex .getEncoding ();
218
- }
219
- return getContext ().getEncodingManager ().getRubyEncoding (encoding );
250
+ final Encoding encoding = checkEncodingNode .executeCheckEncoding (re , str );
251
+ return getRubyEncodingNode .executeGetRubyEncoding (encoding );
220
252
}
221
253
}
222
254
@@ -359,7 +391,7 @@ protected boolean initialized(RubyRegexp regexp) {
359
391
}
360
392
}
361
393
362
- @ Primitive (name = "regexp_match_in_region" , lowerFixnum = { 2 , 3 , 6 })
394
+ @ Primitive (name = "regexp_match_in_region" , lowerFixnum = { 2 , 3 , 5 })
363
395
public abstract static class MatchInRegionNode extends PrimitiveArrayArgumentsNode {
364
396
365
397
/** Matches a regular expression against a string over the specified range of characters.
@@ -375,32 +407,27 @@ public abstract static class MatchInRegionNode extends PrimitiveArrayArgumentsNo
375
407
* @param atStart Whether to only match at the beginning of the string, if false then the regexp can have any
376
408
* amount of prematch.
377
409
*
378
- * @param encodingConversion Whether to attempt encoding conversion of the regexp to match the string
379
- *
380
410
* @param startPos The position within the string which the matcher should consider the start. Setting this to
381
411
* the from position allows scanners to match starting partway through a string while still setting
382
412
* atStart and thus forcing the match to be at the specific starting position. */
383
413
@ Specialization (guards = "libString.isRubyString(string)" )
384
414
protected Object matchInRegion (
385
- RubyRegexp regexp ,
386
- Object string ,
387
- int fromPos ,
388
- int toPos ,
389
- boolean atStart ,
390
- boolean encodingConversion ,
391
- int startPos ,
415
+ RubyRegexp regexp , Object string , int fromPos , int toPos , boolean atStart , int startPos ,
416
+ @ Cached ConditionProfile encodingMismatchProfile ,
392
417
@ Cached RopeNodes .BytesNode bytesNode ,
393
- @ Cached TruffleRegexpNodes .MatchNode matchNode ,
418
+ @ Cached MatchNode matchNode ,
419
+ @ Cached CheckEncodingNode checkEncodingNode ,
394
420
@ CachedLibrary (limit = "2" ) RubyStringLibrary libString ) {
395
- Rope rope = libString .getRope (string );
396
- Matcher matcher = createMatcher (
397
- getContext (),
398
- regexp ,
399
- rope ,
400
- bytesNode .execute (rope ),
401
- encodingConversion ,
402
- startPos ,
403
- this );
421
+ final Rope rope = libString .getRope (string );
422
+ final Encoding enc = checkEncodingNode .executeCheckEncoding (regexp , string );
423
+ Regex regex = regexp .regex ;
424
+
425
+ if (encodingMismatchProfile .profile (regex .getEncoding () != enc )) {
426
+ final EncodingCache encodingCache = regexp .cachedEncodings ;
427
+ regex = encodingCache .getOrCreate (enc , e -> makeRegexpForEncoding (getContext (), regexp , e , this ));
428
+ }
429
+
430
+ final Matcher matcher = getMatcher (regex , bytesNode .execute (rope ), startPos );
404
431
return matchNode .execute (regexp , string , matcher , fromPos , toPos , atStart );
405
432
}
406
433
}
0 commit comments