17
17
18
18
import com .ctc .wstx .util .XmlChars ;
19
19
20
+ import java .util .stream .IntStream ;
21
+
20
22
/**
21
23
* Base class used by readers (specifically, by
22
24
* {@link com.ctc.wstx.sr.StreamScanner}, and its sub-classes)
@@ -50,6 +52,23 @@ public class WstxInputData
50
52
*/
51
53
public final static int MAX_UNICODE_CHAR = 0x10FFFF ;
52
54
55
+ private static final boolean [] asciiNameStartChars = new boolean [128 ];
56
+ static {
57
+ IntStream .rangeClosed ('a' , 'z' ).forEach (i -> asciiNameStartChars [i ] = true );
58
+ IntStream .rangeClosed ('A' , 'Z' ).forEach (i -> asciiNameStartChars [i ] = true );
59
+ asciiNameStartChars ['_' ] = true ;
60
+ }
61
+
62
+ private static final boolean [] asciiNameChars = new boolean [128 ];
63
+ static {
64
+ IntStream .rangeClosed ('a' , 'z' ).forEach (i -> asciiNameChars [i ] = true );
65
+ IntStream .rangeClosed ('A' , 'Z' ).forEach (i -> asciiNameChars [i ] = true );
66
+ IntStream .rangeClosed ('0' , '9' ).forEach (i -> asciiNameChars [i ] = true );
67
+ asciiNameChars ['.' ] = true ;
68
+ asciiNameChars ['-' ] = true ;
69
+ asciiNameChars ['_' ] = true ;
70
+ }
71
+
53
72
/*
54
73
////////////////////////////////////////////////////
55
74
// Configuration
@@ -153,14 +172,9 @@ protected final boolean isNameStartChar(char c)
153
172
/* First, let's handle 7-bit ascii range (identical between xml
154
173
* 1.0 and 1.1)
155
174
*/
156
- if (c <= 0x7A ) { // 'z' or earlier
157
- if (c >= 0x61 ) { // 'a' - 'z' are ok
158
- return true ;
159
- }
160
- if (c < 0x41 ) { // before 'A' just white space
161
- return false ;
162
- }
163
- return (c <= 0x5A ) || (c == '_' ); // 'A' - 'Z' and '_' are ok
175
+ if (c < 128 ) {
176
+ // this is performance critical, so we use a lookup table instead of if-branches
177
+ return asciiNameStartChars [c ];
164
178
}
165
179
/* Ok, otherwise need to use a big honking bit sets... which
166
180
* differ between 1.0 and 1.1
@@ -178,18 +192,9 @@ protected final boolean isNameStartChar(char c)
178
192
protected final boolean isNameChar (char c )
179
193
{
180
194
// First, let's handle 7-bit ascii range
181
- if (c <= 0x7A ) { // 'z' or earlier
182
- if (c >= 0x61 ) { // 'a' - 'z' are ok
183
- return true ;
184
- }
185
- if (c <= 0x5A ) {
186
- if (c >= 0x41 ) { // 'A' - 'Z' ok too
187
- return true ;
188
- }
189
- // As are 0-9, '.' and '-'
190
- return (c >= 0x30 && c <= 0x39 ) || (c == '.' ) || (c == '-' );
191
- }
192
- return (c == 0x5F ); // '_' is ok too
195
+ if (c < 128 ) {
196
+ // this is performance critical, so we use a lookup table instead of if-branches
197
+ return asciiNameChars [c ];
193
198
}
194
199
return mXml11 ? XmlChars .is11NameChar (c ) : XmlChars .is10NameChar (c );
195
200
}
0 commit comments