47
47
import org .jcodings .Encoding ;
48
48
import org .jcodings .specific .ASCIIEncoding ;
49
49
import org .jcodings .specific .USASCIIEncoding ;
50
- import org .jcodings .specific .UTF8Encoding ;
51
50
import org .joni .NameEntry ;
52
51
import org .joni .Option ;
53
52
import org .joni .Regex ;
61
60
import org .truffleruby .core .rope .Rope ;
62
61
import org .truffleruby .core .rope .RopeBuilder ;
63
62
import org .truffleruby .core .rope .RopeOperations ;
63
+ import org .truffleruby .core .rope .RopeWithEncoding ;
64
64
import org .truffleruby .core .string .StringSupport ;
65
65
import org .truffleruby .core .string .StringUtils ;
66
66
import org .truffleruby .language .backtrace .BacktraceFormatter ;
@@ -88,14 +88,14 @@ public Encoding getEncoding() {
88
88
89
89
public static Regex makeRegexp (RubyContext context , RubyDeferredWarnings rubyDeferredWarnings ,
90
90
RopeBuilder processedSource , RegexpOptions options ,
91
- Encoding enc , Rope source , Node currentNode ) throws DeferredRaiseException {
91
+ RubyEncoding enc , Rope source , Node currentNode ) throws DeferredRaiseException {
92
92
try {
93
93
return new Regex (
94
94
processedSource .getUnsafeBytes (),
95
95
0 ,
96
96
processedSource .getLength (),
97
97
options .toJoniOptions (),
98
- enc ,
98
+ enc . jcoding ,
99
99
Syntax .RUBY ,
100
100
rubyDeferredWarnings == null
101
101
? new RegexWarnCallback (context )
@@ -107,10 +107,17 @@ public static Regex makeRegexp(RubyContext context, RubyDeferredWarnings rubyDef
107
107
}
108
108
}
109
109
110
- private static Regex getRegexpFromCache (RubyContext context , RopeBuilder bytes , Encoding encoding ,
110
+ private static Regex getRegexpFromCache (RubyContext context , RopeBuilder bytes , RubyEncoding encoding ,
111
111
RegexpOptions options , Rope source ) throws DeferredRaiseException {
112
112
if (context == null ) {
113
- final Regex regex = makeRegexp (null , null , bytes , options , encoding , source , null );
113
+ final Regex regex = makeRegexp (
114
+ null ,
115
+ null ,
116
+ bytes ,
117
+ options ,
118
+ encoding ,
119
+ source ,
120
+ null );
114
121
regex .setUserObject (bytes );
115
122
return regex ;
116
123
}
@@ -133,37 +140,47 @@ private static Regex getRegexpFromCache(RubyContext context, RopeBuilder bytes,
133
140
}
134
141
}
135
142
136
- public ClassicRegexp (RubyContext context , Rope str , RegexpOptions originalOptions ) throws DeferredRaiseException {
143
+ public ClassicRegexp (RubyContext context , Rope str , RubyEncoding enc , RegexpOptions originalOptions )
144
+ throws DeferredRaiseException {
137
145
this .context = context ;
138
146
this .options = (RegexpOptions ) originalOptions .clone ();
139
147
140
- Encoding enc = str .getEncoding ();
141
- if (enc .isDummy ()) {
148
+ if (enc .jcoding .isDummy ()) {
142
149
throw new UnsupportedOperationException ("can't make regexp with dummy encoding" );
143
150
}
144
151
145
- Encoding [] fixedEnc = new Encoding []{ null };
152
+ RubyEncoding [] fixedEnc = new RubyEncoding []{ null };
146
153
RopeBuilder unescaped = preprocess (str , enc , fixedEnc , RegexpSupport .ErrorMode .RAISE );
147
- enc = computeRegexpEncoding (options , enc , fixedEnc );
148
-
149
- this .pattern = getRegexpFromCache (context , unescaped , enc , options , str );
154
+ final RubyEncoding computedEnc = computeRegexpEncoding (options , enc , fixedEnc );
155
+ this .pattern = getRegexpFromCache (
156
+ context ,
157
+ unescaped ,
158
+ computedEnc ,
159
+ options ,
160
+ RopeOperations .withEncoding (str , computedEnc .jcoding ));
150
161
this .str = str ;
151
162
}
152
163
153
164
@ TruffleBoundary
154
165
@ SuppressWarnings ("fallthrough" )
155
- private static boolean unescapeNonAscii (RopeBuilder to , Rope str , Encoding enc ,
156
- Encoding [] encp , RegexpSupport .ErrorMode mode ) throws DeferredRaiseException {
166
+ private static boolean unescapeNonAscii (RopeBuilder to , Rope str , RubyEncoding enc ,
167
+ RubyEncoding [] encp , RegexpSupport .ErrorMode mode ) throws DeferredRaiseException {
157
168
boolean hasProperty = false ;
158
169
byte [] buf = null ;
159
170
160
171
int p = 0 ;
161
172
int end = str .byteLength ();
162
173
final byte [] bytes = str .getBytes ();
163
174
175
+
164
176
while (p < end ) {
165
177
final int cl = StringSupport
166
- .characterLength (enc , enc == str .getEncoding () ? str .getCodeRange () : CR_UNKNOWN , bytes , p , end );
178
+ .characterLength (
179
+ enc .jcoding ,
180
+ enc .jcoding == str .getEncoding () ? str .getCodeRange () : CR_UNKNOWN ,
181
+ bytes ,
182
+ p ,
183
+ end );
167
184
if (cl <= 0 ) {
168
185
raisePreprocessError (str , "invalid multibyte character" , mode );
169
186
}
@@ -208,7 +225,7 @@ private static boolean unescapeNonAscii(RopeBuilder to, Rope str, Encoding enc,
208
225
case 'C' : /* \C-X, \C-\M-X */
209
226
case 'M' : /* \M-X, \M-\C-X, \M-\cX */
210
227
p -= 2 ;
211
- if (enc == USASCIIEncoding . INSTANCE ) {
228
+ if (enc == Encodings . US_ASCII ) {
212
229
if (buf == null ) {
213
230
buf = new byte [1 ];
214
231
}
@@ -269,7 +286,7 @@ private static boolean unescapeNonAscii(RopeBuilder to, Rope str, Encoding enc,
269
286
}
270
287
271
288
private static int unescapeUnicodeBmp (RopeBuilder to , byte [] bytes , int p , int end ,
272
- Encoding [] encp , Rope str , RegexpSupport .ErrorMode mode ) throws DeferredRaiseException {
289
+ RubyEncoding [] encp , Rope str , RegexpSupport .ErrorMode mode ) throws DeferredRaiseException {
273
290
if (p + 4 > end ) {
274
291
raisePreprocessError (str , "invalid Unicode escape" , mode );
275
292
}
@@ -283,7 +300,7 @@ private static int unescapeUnicodeBmp(RopeBuilder to, byte[] bytes, int p, int e
283
300
}
284
301
285
302
private static int unescapeUnicodeList (RopeBuilder to , byte [] bytes , int p , int end ,
286
- Encoding [] encp , Rope str , RegexpSupport .ErrorMode mode ) throws DeferredRaiseException {
303
+ RubyEncoding [] encp , Rope str , RegexpSupport .ErrorMode mode ) throws DeferredRaiseException {
287
304
while (p < end && ASCIIEncoding .INSTANCE .isSpace (bytes [p ] & 0xff )) {
288
305
p ++;
289
306
}
@@ -314,7 +331,7 @@ private static int unescapeUnicodeList(RopeBuilder to, byte[] bytes, int p, int
314
331
return p ;
315
332
}
316
333
317
- private static void appendUtf8 (RopeBuilder to , int code , Encoding [] enc , Rope str ,
334
+ private static void appendUtf8 (RopeBuilder to , int code , RubyEncoding [] enc , Rope str ,
318
335
RegexpSupport .ErrorMode mode ) throws DeferredRaiseException {
319
336
checkUnicodeRange (code , str , mode );
320
337
@@ -328,8 +345,8 @@ private static void appendUtf8(RopeBuilder to, int code, Encoding[] enc, Rope st
328
345
to .setLength (to .getLength () + utf8Decode (to .getUnsafeBytes (), to .getLength (), code ));
329
346
}
330
347
if (enc [0 ] == null ) {
331
- enc [0 ] = UTF8Encoding . INSTANCE ;
332
- } else if (!(enc [0 ].isUTF8 ())) {
348
+ enc [0 ] = Encodings . UTF_8 ;
349
+ } else if (!(enc [0 ].jcoding . isUTF8 ())) {
333
350
raisePreprocessError (str , "UTF-8 character in non UTF-8 regexp" , mode );
334
351
}
335
352
}
@@ -381,17 +398,19 @@ private static void checkUnicodeRange(int code, Rope str, RegexpSupport.ErrorMod
381
398
}
382
399
383
400
private static int unescapeEscapedNonAscii (RopeBuilder to , byte [] bytes , int p , int end ,
384
- Encoding enc , Encoding [] encp , Rope str , RegexpSupport .ErrorMode mode ) throws DeferredRaiseException {
385
- byte [] chBuf = new byte [enc .maxLength ()];
401
+ RubyEncoding enc , RubyEncoding [] encp , Rope str , RegexpSupport .ErrorMode mode )
402
+ throws DeferredRaiseException {
403
+ byte [] chBuf = new byte [enc .jcoding .maxLength ()];
386
404
int chLen = 0 ;
387
405
388
406
p = readEscapedByte (chBuf , chLen ++, bytes , p , end , str , mode );
389
- while (chLen < enc .maxLength () &&
390
- StringSupport .MBCLEN_NEEDMORE_P (StringSupport .characterLength (enc , CR_UNKNOWN , chBuf , 0 , chLen ))) {
407
+ while (chLen < enc .jcoding .maxLength () &&
408
+ StringSupport
409
+ .MBCLEN_NEEDMORE_P (StringSupport .characterLength (enc .jcoding , CR_UNKNOWN , chBuf , 0 , chLen ))) {
391
410
p = readEscapedByte (chBuf , chLen ++, bytes , p , end , str , mode );
392
411
}
393
412
394
- int cl = StringSupport .characterLength (enc , CR_UNKNOWN , chBuf , 0 , chLen );
413
+ int cl = StringSupport .characterLength (enc . jcoding , CR_UNKNOWN , chBuf , 0 , chLen );
395
414
if (cl == -1 ) {
396
415
raisePreprocessError (str , "invalid multibyte escape" , mode ); // MBCLEN_INVALID_P
397
416
}
@@ -551,86 +570,94 @@ public static int readEscapedByte(byte[] to, int toP, byte[] bytes, int p, int e
551
570
} // while
552
571
}
553
572
554
- public static void preprocessCheck (Rope bytes ) throws DeferredRaiseException {
555
- preprocess (bytes , bytes .getEncoding (), new Encoding []{ null }, RegexpSupport .ErrorMode .RAISE );
573
+ public static void preprocessCheck (RopeWithEncoding ropeWithEncoding ) throws DeferredRaiseException {
574
+ preprocess (
575
+ ropeWithEncoding .getRope (),
576
+ ropeWithEncoding .getEncoding (),
577
+ new RubyEncoding []{ null },
578
+ RegexpSupport .ErrorMode .RAISE );
556
579
}
557
580
558
- public static RopeBuilder preprocess (Rope str , Encoding enc , Encoding [] fixedEnc ,
581
+ public static RopeBuilder preprocess (Rope str , RubyEncoding enc , RubyEncoding [] fixedEnc ,
559
582
RegexpSupport .ErrorMode mode ) throws DeferredRaiseException {
560
583
RopeBuilder to = RopeBuilder .createRopeBuilder (str .byteLength ());
561
584
562
- if (enc .isAsciiCompatible ()) {
585
+ if (enc .jcoding . isAsciiCompatible ()) {
563
586
fixedEnc [0 ] = null ;
564
587
} else {
565
588
fixedEnc [0 ] = enc ;
566
- to .setEncoding (enc );
589
+ to .setEncoding (enc . jcoding );
567
590
}
568
591
569
592
boolean hasProperty = unescapeNonAscii (to , str , enc , fixedEnc , mode );
570
593
if (hasProperty && fixedEnc [0 ] == null ) {
571
594
fixedEnc [0 ] = enc ;
572
595
}
573
596
if (fixedEnc [0 ] != null ) {
574
- to .setEncoding (fixedEnc [0 ]);
597
+ to .setEncoding (fixedEnc [0 ]. jcoding );
575
598
}
576
599
return to ;
577
600
}
578
601
579
- private static void preprocessLight (RubyContext context , Rope str , Encoding enc , Encoding [] fixedEnc ,
602
+ private static void preprocessLight (RubyContext context , RopeWithEncoding str , RubyEncoding enc ,
603
+ RubyEncoding [] fixedEnc ,
580
604
RegexpSupport .ErrorMode mode ) throws DeferredRaiseException {
581
- if (enc .isAsciiCompatible ()) {
605
+ if (enc .jcoding . isAsciiCompatible ()) {
582
606
fixedEnc [0 ] = null ;
583
607
} else {
584
608
fixedEnc [0 ] = enc ;
585
609
}
586
610
587
- boolean hasProperty = unescapeNonAscii (null , str , enc , fixedEnc , mode );
611
+ boolean hasProperty = unescapeNonAscii (null , str . getRope () , enc , fixedEnc , mode );
588
612
if (hasProperty && fixedEnc [0 ] == null ) {
589
613
fixedEnc [0 ] = enc ;
590
614
}
591
615
}
592
616
593
- public static RopeBuilder preprocessDRegexp (RubyContext context , Rope [] strings , RegexpOptions options )
617
+ public static RopeWithEncoding preprocessDRegexp (RubyContext context , RopeWithEncoding [] strings ,
618
+ RegexpOptions options )
594
619
throws DeferredRaiseException {
595
620
assert strings .length > 0 ;
596
621
597
- RopeBuilder string = RopeOperations .toRopeBuilderCopy (strings [0 ]);
622
+ RopeBuilder string = RopeOperations .toRopeBuilderCopy (strings [0 ]. getRope () );
598
623
599
- Encoding regexpEnc = processDRegexpElement (context , options , null , strings [0 ]);
624
+ RubyEncoding regexpEnc = processDRegexpElement (context , options , null , strings [0 ]);
600
625
601
626
for (int i = 1 ; i < strings .length ; i ++) {
602
- Rope str = strings [i ];
627
+ RopeWithEncoding str = strings [i ];
603
628
regexpEnc = processDRegexpElement (context , options , regexpEnc , str );
604
- string .append (str );
629
+ string .append (str . getRope () );
605
630
}
606
631
607
632
if (regexpEnc != null ) {
608
- string .setEncoding (regexpEnc );
633
+ string .setEncoding (regexpEnc .jcoding );
634
+ } else {
635
+ regexpEnc = strings [0 ].getEncoding ();
609
636
}
610
-
611
- return string ;
637
+ return new RopeWithEncoding (RopeOperations .ropeFromRopeBuilder (string ), regexpEnc );
612
638
}
613
639
614
640
@ TruffleBoundary
615
- private static Encoding processDRegexpElement (RubyContext context , RegexpOptions options , Encoding regexpEnc ,
616
- Rope str ) throws DeferredRaiseException {
617
- Encoding strEnc = str .getEncoding ();
641
+ private static RubyEncoding processDRegexpElement (RubyContext context , RegexpOptions options ,
642
+ RubyEncoding regexpEnc ,
643
+ RopeWithEncoding str ) throws DeferredRaiseException {
644
+ RubyEncoding strEnc = str .getEncoding ();
618
645
619
- if (options .isEncodingNone () && strEnc != ASCIIEncoding . INSTANCE ) {
620
- if (str .getCodeRange () != CR_7BIT ) {
646
+ if (options .isEncodingNone () && strEnc != Encodings . BINARY ) {
647
+ if (str .getRope (). getCodeRange () != CR_7BIT ) {
621
648
throw new RaiseException (
622
649
context ,
623
650
context .getCoreExceptions ().regexpError (
624
651
"/.../n has a non escaped non ASCII character in non ASCII-8BIT script" ,
625
652
null ));
626
653
}
627
- strEnc = ASCIIEncoding . INSTANCE ;
654
+ strEnc = Encodings . BINARY ;
628
655
}
629
656
630
657
// This used to call preprocess, but the resulting rope builder was not
631
658
// used. Since the preprocessing error-checking can be done without
632
659
// creating a new rope builder, I added a "light" path.
633
- final Encoding [] fixedEnc = new Encoding []{ null };
660
+ final RubyEncoding [] fixedEnc = new RubyEncoding []{ null };
634
661
ClassicRegexp .preprocessLight (context , str , strEnc , fixedEnc , RegexpSupport .ErrorMode .PREPROCESS );
635
662
636
663
if (fixedEnc [0 ] != null ) {
@@ -788,21 +815,21 @@ public static Pair<Rope, RubyEncoding> quote19(Rope bs, RubyEncoding encoding) {
788
815
}
789
816
790
817
/** WARNING: This mutates options, so the caller should make sure it's a copy */
791
- static Encoding computeRegexpEncoding (RegexpOptions options , Encoding enc , Encoding [] fixedEnc )
818
+ static RubyEncoding computeRegexpEncoding (RegexpOptions options , RubyEncoding enc , RubyEncoding [] fixedEnc )
792
819
throws DeferredRaiseException {
793
820
if (fixedEnc [0 ] != null ) {
794
821
if ((fixedEnc [0 ] != enc && options .isFixed ()) ||
795
- (fixedEnc [0 ] != ASCIIEncoding . INSTANCE && options .isEncodingNone ())) {
822
+ (fixedEnc [0 ] != Encodings . BINARY && options .isEncodingNone ())) {
796
823
throw new DeferredRaiseException (context -> context
797
824
.getCoreExceptions ()
798
825
.regexpError ("incompatible character encoding" , null ));
799
826
}
800
- if (fixedEnc [0 ] != ASCIIEncoding . INSTANCE ) {
827
+ if (fixedEnc [0 ] != Encodings . BINARY ) {
801
828
options .setFixed (true );
802
829
enc = fixedEnc [0 ];
803
830
}
804
831
} else if (!options .isFixed ()) {
805
- enc = USASCIIEncoding . INSTANCE ;
832
+ enc = Encodings . US_ASCII ;
806
833
}
807
834
808
835
if (fixedEnc [0 ] != null ) {
0 commit comments