36
36
***** END LICENSE BLOCK *****/
37
37
package org .truffleruby .core .regexp ;
38
38
39
- import static org .truffleruby .core .string .StringUtils .EMPTY_STRING_ARRAY ;
40
-
41
39
import java .nio .charset .StandardCharsets ;
42
40
import java .util .Arrays ;
43
- import java .util .Iterator ;
44
41
45
42
import com .oracle .truffle .api .strings .AbstractTruffleString ;
46
43
import com .oracle .truffle .api .strings .TruffleStringBuilder ;
49
46
import org .jcodings .specific .SJISEncoding ;
50
47
import org .jcodings .specific .USASCIIEncoding ;
51
48
import org .jcodings .specific .UTF8Encoding ;
52
- import org .joni .NameEntry ;
53
49
import org .joni .Option ;
54
50
import org .joni .Regex ;
55
51
import org .joni .Syntax ;
59
55
import org .truffleruby .collections .ByteArrayBuilder ;
60
56
import org .truffleruby .core .encoding .Encodings ;
61
57
import org .truffleruby .core .encoding .RubyEncoding ;
62
- import org .truffleruby .core .encoding .TStringUtils ;
63
58
import org .truffleruby .core .string .ATStringWithEncoding ;
64
59
import org .truffleruby .core .string .TStringBuilder ;
65
60
import org .truffleruby .core .string .TStringWithEncoding ;
68
63
import org .truffleruby .language .backtrace .BacktraceFormatter ;
69
64
import org .truffleruby .language .control .DeferredRaiseException ;
70
65
import org .truffleruby .language .control .RaiseException ;
71
- import org .truffleruby .parser .ReOptions ;
72
66
73
67
import com .oracle .truffle .api .CompilerDirectives .TruffleBoundary ;
74
68
import com .oracle .truffle .api .nodes .Node ;
75
69
import org .truffleruby .parser .RubyDeferredWarnings ;
76
70
77
- public final class ClassicRegexp implements ReOptions {
78
-
79
- private final Regex pattern ;
80
- private final TStringWithEncoding str ;
81
- private RegexpOptions options ;
82
-
83
- public void setLiteral () {
84
- options = options .setLiteral (true );
85
- }
86
-
87
- public Encoding getEncoding () {
88
- return pattern .getEncoding ();
89
- }
71
+ public final class ClassicRegexp {
90
72
91
73
public static Regex makeRegexp (RubyDeferredWarnings rubyDeferredWarnings ,
92
74
TStringBuilder processedSource , RegexpOptions options ,
@@ -112,34 +94,6 @@ public static String getRegexErrorMessage(AbstractTruffleString source, Exceptio
112
94
return BacktraceFormatter .formatJavaThrowableMessage (e ) + ": /" + source + "/" + options .toOptionsString ();
113
95
}
114
96
115
- private static Regex getRegexpFromCache (TStringBuilder bytes , RubyEncoding encoding , RegexpOptions options ,
116
- AbstractTruffleString source ) throws DeferredRaiseException {
117
- final Regex newRegex = makeRegexp (null , bytes , options , encoding , source , null );
118
- newRegex .setUserObject (bytes );
119
- return newRegex ;
120
- }
121
-
122
- public ClassicRegexp (TStringWithEncoding strEnc , RegexpOptions originalOptions )
123
- throws DeferredRaiseException {
124
- this .options = originalOptions ;
125
-
126
- if (strEnc .encoding .isDummy ) {
127
- throw new UnsupportedOperationException ("can't make regexp with dummy encoding" );
128
- }
129
-
130
- RegexpOptions [] optionsArray = new RegexpOptions []{ originalOptions };
131
- RubyEncoding [] fixedEnc = new RubyEncoding []{ null };
132
- TStringBuilder unescaped = preprocess (strEnc , strEnc .encoding , fixedEnc , RegexpSupport .ErrorMode .RAISE );
133
- final RubyEncoding computedEnc = computeRegexpEncoding (optionsArray , strEnc .encoding , fixedEnc );
134
- this .pattern = getRegexpFromCache (
135
- unescaped ,
136
- computedEnc ,
137
- options ,
138
- strEnc .forceEncoding (computedEnc ).tstring );
139
- this .options = optionsArray [0 ];
140
- this .str = strEnc ;
141
- }
142
-
143
97
@ TruffleBoundary
144
98
@ SuppressWarnings ("fallthrough" )
145
99
private static boolean unescapeNonAscii (TStringBuilder to , TStringWithEncoding str , RubyEncoding enc ,
@@ -849,34 +803,33 @@ public static void appendOptions(TStringBuilder to, RegexpOptions options) {
849
803
}
850
804
851
805
@ SuppressWarnings ("unused" )
852
- public ByteArrayBuilder toByteArrayBuilder ( ) {
806
+ public static TStringWithEncoding toS ( TStringWithEncoding source , RegexpOptions options ) {
853
807
RegexpOptions newOptions = (RegexpOptions ) options .clone ();
854
- var byteArray = str .getInternalByteArray ();
808
+ var byteArray = source .getInternalByteArray ();
855
809
int p = 0 ;
856
810
int len = byteArray .getLength ();
857
811
858
812
TStringBuilder result = TStringBuilder .create (len );
859
813
result .append ((byte ) '(' );
860
814
result .append ((byte ) '?' );
861
815
862
- again : do {
816
+ do {
863
817
if (len >= 4 && byteArray .get (p ) == '(' && byteArray .get (p + 1 ) == '?' ) {
864
- boolean err = true ;
865
818
p += 2 ;
866
- if (( len -= 2 ) > 0 ) {
867
- do {
868
- if (byteArray .get (p ) == 'm' ) {
869
- newOptions = newOptions .setMultiline (true );
870
- } else if (byteArray .get (p ) == 'i' ) {
871
- newOptions = newOptions .setIgnorecase (true );
872
- } else if (byteArray .get (p ) == 'x' ) {
873
- newOptions = newOptions .setExtended (true );
874
- } else {
875
- break ;
876
- }
877
- p ++;
878
- } while (--len > 0 );
879
- }
819
+ len -= 2 ;
820
+ do {
821
+ if (byteArray .get (p ) == 'm' ) {
822
+ newOptions = newOptions .setMultiline (true );
823
+ } else if (byteArray .get (p ) == 'i' ) {
824
+ newOptions = newOptions .setIgnorecase (true );
825
+ } else if (byteArray .get (p ) == 'x' ) {
826
+ newOptions = newOptions .setExtended (true );
827
+ } else {
828
+ break ;
829
+ }
830
+ p ++;
831
+ } while (--len > 0 );
832
+
880
833
if (len > 1 && byteArray .get (p ) == '-' ) {
881
834
++p ;
882
835
--len ;
@@ -897,9 +850,10 @@ public ByteArrayBuilder toByteArrayBuilder() {
897
850
if (byteArray .get (p ) == ')' ) {
898
851
--len ;
899
852
++p ;
900
- continue again ;
853
+ continue ;
901
854
}
902
855
856
+ boolean err = true ;
903
857
if (byteArray .get (p ) == ':' && byteArray .get (p + len - 1 ) == ')' ) {
904
858
p ++;
905
859
try {
@@ -908,7 +862,7 @@ public ByteArrayBuilder toByteArrayBuilder() {
908
862
p + byteArray .getOffset (),
909
863
p + byteArray .getOffset () + (len -= 2 ),
910
864
Option .DEFAULT ,
911
- str .encoding .jcoding ,
865
+ source .encoding .jcoding ,
912
866
Syntax .DEFAULT ,
913
867
new RegexWarnCallback ());
914
868
err = false ;
@@ -920,7 +874,7 @@ public ByteArrayBuilder toByteArrayBuilder() {
920
874
if (err ) {
921
875
newOptions = options ;
922
876
p = 0 ;
923
- len = str .byteLength ();
877
+ len = source .byteLength ();
924
878
}
925
879
}
926
880
@@ -939,17 +893,16 @@ public ByteArrayBuilder toByteArrayBuilder() {
939
893
}
940
894
}
941
895
result .append ((byte ) ':' );
942
- appendRegexpString (result , str , p , len );
896
+ appendRegexpString (result , source , p , len );
943
897
944
898
result .append ((byte ) ')' );
945
- result .setEncoding (Encodings .getBuiltInEncoding (getEncoding ()));
946
- return result ;
947
- //return RubyString.newString(getRuntime(), result, getEncoding()).infectBy(this);
899
+ result .setEncoding (source .encoding );
900
+ return result .toTStringWithEnc ();
948
901
} while (true );
949
902
}
950
903
951
904
@ TruffleBoundary
952
- public void appendRegexpString (TStringBuilder to , TStringWithEncoding fullStr , int start , int len ) {
905
+ public static void appendRegexpString (TStringBuilder to , TStringWithEncoding fullStr , int start , int len ) {
953
906
var str = fullStr .substring (start , len );
954
907
955
908
final var enc = str .encoding .jcoding ;
@@ -995,45 +948,11 @@ public void appendRegexpString(TStringBuilder to, TStringWithEncoding fullStr, i
995
948
}
996
949
}
997
950
998
- public String [] getNames () {
999
- int nameLength = pattern .numberOfNames ();
1000
- if (nameLength == 0 ) {
1001
- return EMPTY_STRING_ARRAY ;
1002
- }
1003
-
1004
- RubyEncoding encoding = Encodings .getBuiltInEncoding (pattern .getEncoding ());
1005
- String [] names = new String [nameLength ];
1006
- int j = 0 ;
1007
- for (Iterator <NameEntry > i = pattern .namedBackrefIterator (); i .hasNext ();) {
1008
- NameEntry e = i .next ();
1009
- // intern() to improve footprint
1010
- names [j ++] = TStringUtils .bytesToJavaStringOrThrow (e .name , e .nameP , e .nameEnd - e .nameP , encoding ).intern ();
1011
- }
1012
-
1013
- return names ;
1014
- }
1015
-
1016
951
// Code that used to be in ParserSupport but copied here as ParserSupport is coupled with the JRuby lexer & parser.
1017
952
// Needed until https://github.com/ruby/prism/issues/1997 is fixed.
1018
953
1019
- // From ParserSupport#newRegexpNode
1020
- public static TStringWithEncoding findEncodingForRegexpLiteral (TStringWithEncoding regexp , RegexpOptions options ,
1021
- RubyEncoding lexerEncoding , Node currentNode ) throws DeferredRaiseException {
1022
- TStringWithEncoding meat = regexpFragmentCheck (regexp , options , lexerEncoding , currentNode );
1023
- checkRegexpSyntax (meat , options .withoutOnce ());
1024
- return meat ;
1025
- }
1026
-
1027
- // MRI: reg_fragment_check
1028
- public static TStringWithEncoding regexpFragmentCheck (TStringWithEncoding value , RegexpOptions options ,
1029
- RubyEncoding lexerEncoding , Node currentNode ) throws DeferredRaiseException {
1030
- final TStringWithEncoding strEnc = setRegexpEncoding (value , options , lexerEncoding , currentNode );
1031
- ClassicRegexp .preprocessCheck (strEnc );
1032
- return strEnc ;
1033
- }
1034
-
1035
954
// MRI: reg_fragment_setenc_gen
1036
- private static TStringWithEncoding setRegexpEncoding (TStringWithEncoding value , RegexpOptions options ,
955
+ public static TStringWithEncoding setRegexpEncoding (TStringWithEncoding value , RegexpOptions options ,
1037
956
RubyEncoding lexerEncoding , Node currentNode ) throws DeferredRaiseException {
1038
957
options = options .setup ();
1039
958
final RubyEncoding optionsEncoding = options .getEncoding () == null
@@ -1067,12 +986,6 @@ private static TStringWithEncoding setRegexpEncoding(TStringWithEncoding value,
1067
986
return value ;
1068
987
}
1069
988
1070
- private static ClassicRegexp checkRegexpSyntax (TStringWithEncoding value , RegexpOptions options )
1071
- throws DeferredRaiseException {
1072
- // This is only for syntax checking but this will as a side effect create an entry in the regexp cache.
1073
- return new ClassicRegexp (value , options );
1074
- }
1075
-
1076
989
private static char optionsEncodingChar (Encoding optionEncoding ) {
1077
990
if (optionEncoding == USASCIIEncoding .INSTANCE ) {
1078
991
return 'n' ;
0 commit comments