Skip to content

Commit e1dc587

Browse files
committed
Move get_actual_encoding to a node.
Also simplify the call signature since all our ropes start at byte 0 and end at their byte length.
1 parent b510bc6 commit e1dc587

File tree

4 files changed

+71
-53
lines changed

4 files changed

+71
-53
lines changed

src/main/java/org/truffleruby/core/encoding/EncodingNodes.java

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2014, 2018 Oracle and/or its affiliates. All rights reserved. This
2+
* Copyright (c) 2014, 2019 Oracle and/or its affiliates. All rights reserved. This
33
* code is released under a tri EPL/GPL/LGPL license. You can use it,
44
* redistribute it and/or modify it under the terms of the:
55
*
@@ -20,7 +20,13 @@
2020
import com.oracle.truffle.api.profiles.ConditionProfile;
2121
import org.jcodings.Encoding;
2222
import org.jcodings.EncodingDB;
23+
import org.jcodings.specific.ASCIIEncoding;
2324
import org.jcodings.specific.USASCIIEncoding;
25+
import org.jcodings.specific.UTF16BEEncoding;
26+
import org.jcodings.specific.UTF16LEEncoding;
27+
import org.jcodings.specific.UTF32BEEncoding;
28+
import org.jcodings.specific.UTF32LEEncoding;
29+
import org.jcodings.unicode.UnicodeEncoding;
2430
import org.jcodings.util.CaseInsensitiveBytesHash;
2531
import org.jcodings.util.Hash;
2632
import org.truffleruby.Layouts;
@@ -468,6 +474,61 @@ public boolean isUnicode(DynamicObject encoding) {
468474

469475
}
470476

477+
// Port of MRI's `get_actual_encoding`.
478+
public abstract static class GetActualEncodingNode extends RubyBaseNode {
479+
480+
protected static final Encoding UTF16Dummy = EncodingDB.getEncodings().get("UTF-16".getBytes()).getEncoding();
481+
protected static final Encoding UTF32Dummy = EncodingDB.getEncodings().get("UTF-32".getBytes()).getEncoding();
482+
483+
public static GetActualEncodingNode create() {
484+
return EncodingNodesFactory.GetActualEncodingNodeGen.create();
485+
}
486+
487+
public abstract Encoding execute(Rope rope);
488+
489+
@Specialization(guards = "!rope.getEncoding().isDummy()")
490+
public Encoding getActualEncoding(Rope rope) {
491+
return rope.getEncoding();
492+
}
493+
494+
@TruffleBoundary
495+
@Specialization(guards = "rope.getEncoding().isDummy()")
496+
public Encoding getActualEncodingDummy(Rope rope) {
497+
final Encoding encoding = rope.getEncoding();
498+
499+
if (encoding instanceof UnicodeEncoding) {
500+
// handle dummy UTF-16 and UTF-32 by scanning for BOM, as in MRI
501+
if (encoding == UTF16Dummy && rope.byteLength() >= 2) {
502+
int c0 = rope.get(0) & 0xff;
503+
int c1 = rope.get(1) & 0xff;
504+
505+
if (c0 == 0xFE && c1 == 0xFF) {
506+
return UTF16BEEncoding.INSTANCE;
507+
} else if (c0 == 0xFF && c1 == 0xFE) {
508+
return UTF16LEEncoding.INSTANCE;
509+
}
510+
return ASCIIEncoding.INSTANCE;
511+
} else if (encoding == UTF32Dummy && rope.byteLength() >= 4) {
512+
int c0 = rope.get(0) & 0xff;
513+
int c1 = rope.get(1) & 0xff;
514+
int c2 = rope.get(2) & 0xff;
515+
int c3 = rope.get(3) & 0xff;
516+
517+
if (c0 == 0 && c1 == 0 && c2 == 0xFE && c3 == 0xFF) {
518+
return UTF32BEEncoding.INSTANCE;
519+
} else if (c3 == 0 && c2 == 0 && c1 == 0xFE && c0 == 0xFF) {
520+
return UTF32LEEncoding.INSTANCE;
521+
}
522+
return ASCIIEncoding.INSTANCE;
523+
}
524+
}
525+
526+
return encoding;
527+
}
528+
529+
530+
}
531+
471532
@Primitive(name = "encoding_get_default_encoding", needsSelf = false)
472533
public abstract static class GetDefaultEncodingNode extends PrimitiveArrayArgumentsNode {
473534

src/main/java/org/truffleruby/core/rope/RopeOperations.java

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2016, 2018 Oracle and/or its affiliates. All rights reserved. This
2+
* Copyright (c) 2016, 2019 Oracle and/or its affiliates. All rights reserved. This
33
* code is released under a tri EPL/GPL/LGPL license. You can use it,
44
* redistribute it and/or modify it under the terms of the:
55
*
@@ -31,7 +31,6 @@
3131
import org.truffleruby.core.Hashing;
3232
import org.truffleruby.core.encoding.EncodingManager;
3333
import org.truffleruby.core.rope.RopeNodes.WithEncodingNode;
34-
import org.truffleruby.core.string.EncodingUtils;
3534
import org.truffleruby.core.string.StringAttributes;
3635
import org.truffleruby.core.string.StringOperations;
3736
import org.truffleruby.core.string.StringSupport;
@@ -234,12 +233,6 @@ private static String decode(Charset charset, byte[] bytes, int byteOffset, int
234233
return new String(bytes, byteOffset, byteLength, charset);
235234
}
236235

237-
// MRI: get_actual_encoding
238-
@TruffleBoundary
239-
public static Encoding STR_ENC_GET(Rope rope) {
240-
return EncodingUtils.getActualEncoding(rope.getEncoding(), rope.getBytes(), 0, rope.byteLength());
241-
}
242-
243236
@TruffleBoundary
244237
public static StringAttributes calculateCodeRangeAndLength(Encoding encoding, byte[] bytes, int start, int end) {
245238
if (bytes.length == 0) {

src/main/java/org/truffleruby/core/string/EncodingUtils.java

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,8 @@
2929
import java.util.List;
3030

3131
import org.jcodings.Encoding;
32-
import org.jcodings.EncodingDB;
3332
import org.jcodings.ascii.AsciiTables;
3433
import org.jcodings.specific.ASCIIEncoding;
35-
import org.jcodings.specific.UTF16BEEncoding;
36-
import org.jcodings.specific.UTF16LEEncoding;
37-
import org.jcodings.specific.UTF32BEEncoding;
38-
import org.jcodings.specific.UTF32LEEncoding;
39-
import org.jcodings.unicode.UnicodeEncoding;
4034
import org.truffleruby.core.rope.CodeRange;
4135

4236
public class EncodingUtils {
@@ -134,39 +128,6 @@ public static List<String> encodingNames(byte[] name, int p, int end) {
134128
}
135129

136130

137-
private static final Encoding UTF16Dummy = EncodingDB.getEncodings().get("UTF-16".getBytes()).getEncoding();
138-
private static final Encoding UTF32Dummy = EncodingDB.getEncodings().get("UTF-32".getBytes()).getEncoding();
139-
140-
public static Encoding getActualEncoding(Encoding enc, byte[] bytes, int p, int end) {
141-
if (enc.isDummy() && enc instanceof UnicodeEncoding) {
142-
// handle dummy UTF-16 and UTF-32 by scanning for BOM, as in MRI
143-
if (enc == UTF16Dummy && end - p >= 2) {
144-
int c0 = bytes[p] & 0xff;
145-
int c1 = bytes[p + 1] & 0xff;
146-
147-
if (c0 == 0xFE && c1 == 0xFF) {
148-
return UTF16BEEncoding.INSTANCE;
149-
} else if (c0 == 0xFF && c1 == 0xFE) {
150-
return UTF16LEEncoding.INSTANCE;
151-
}
152-
return ASCIIEncoding.INSTANCE;
153-
} else if (enc == UTF32Dummy && end - p >= 4) {
154-
int c0 = bytes[p] & 0xff;
155-
int c1 = bytes[p + 1] & 0xff;
156-
int c2 = bytes[p + 2] & 0xff;
157-
int c3 = bytes[p + 3] & 0xff;
158-
159-
if (c0 == 0 && c1 == 0 && c2 == 0xFE && c3 == 0xFF) {
160-
return UTF32BEEncoding.INSTANCE;
161-
} else if (c3 == 0 && c2 == 0 && c1 == 0xFE && c0 == 0xFF) {
162-
return UTF32LEEncoding.INSTANCE;
163-
}
164-
return ASCIIEncoding.INSTANCE;
165-
}
166-
}
167-
return enc;
168-
}
169-
170131
// rb_enc_ascget
171132
public static int encAscget(byte[] pBytes, int p, int e, int[] len, Encoding enc, CodeRange codeRange) {
172133
int c;

src/main/java/org/truffleruby/core/string/StringNodes.java

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2013, 2018 Oracle and/or its affiliates. All rights reserved. This
2+
* Copyright (c) 2013, 2019 Oracle and/or its affiliates. All rights reserved. This
33
* code is released under a tri EPL/GPL/LGPL license. You can use it,
44
* redistribute it and/or modify it under the terms of the:
55
*
@@ -103,6 +103,7 @@
103103
import org.truffleruby.core.cast.ToIntNodeGen;
104104
import org.truffleruby.core.cast.ToStrNode;
105105
import org.truffleruby.core.cast.ToStrNodeGen;
106+
import org.truffleruby.core.encoding.EncodingNodes;
106107
import org.truffleruby.core.encoding.EncodingNodes.CheckEncodingNode;
107108
import org.truffleruby.core.encoding.EncodingNodes.CheckRopeEncodingNode;
108109
import org.truffleruby.core.encoding.EncodingNodes.NegotiateCompatibleEncodingNode;
@@ -1396,11 +1397,12 @@ public Object lstripBangSingleByte(DynamicObject string,
13961397
@TruffleBoundary
13971398
@Specialization(guards = { "!isEmpty(string)", "!isSingleByteOptimizable(string, singleByteOptimizableNode)" })
13981399
public Object lstripBang(DynamicObject string,
1399-
@Cached("create()") RopeNodes.SingleByteOptimizableNode singleByteOptimizableNode) {
1400+
@Cached("create()") RopeNodes.SingleByteOptimizableNode singleByteOptimizableNode,
1401+
@Cached("create()") EncodingNodes.GetActualEncodingNode getActualEncodingNode) {
14001402
// Taken from org.jruby.RubyString#lstrip_bang19 and org.jruby.RubyString#multiByteLStrip.
14011403

14021404
final Rope rope = rope(string);
1403-
final Encoding enc = RopeOperations.STR_ENC_GET(rope);
1405+
final Encoding enc = getActualEncodingNode.execute(rope);
14041406
final int s = 0;
14051407
final int end = s + rope.byteLength();
14061408

@@ -1512,11 +1514,12 @@ public Object rstripBangSingleByte(DynamicObject string,
15121514
@TruffleBoundary
15131515
@Specialization(guards = { "!isEmpty(string)", "!isSingleByteOptimizable(string, singleByteOptimizableNode)" })
15141516
public Object rstripBang(DynamicObject string,
1515-
@Cached("createBinaryProfile()") ConditionProfile dummyEncodingProfile) {
1517+
@Cached("create()") EncodingNodes.GetActualEncodingNode getActualEncodingNode,
1518+
@Cached("createBinaryProfile()") ConditionProfile dummyEncodingProfile) {
15161519
// Taken from org.jruby.RubyString#rstrip_bang19 and org.jruby.RubyString#multiByteRStrip19.
15171520

15181521
final Rope rope = rope(string);
1519-
final Encoding enc = RopeOperations.STR_ENC_GET(rope);
1522+
final Encoding enc = getActualEncodingNode.execute(rope);
15201523

15211524
if (dummyEncodingProfile.profile(enc.isDummy())) {
15221525
throw new RaiseException(getContext(), coreExceptions().encodingCompatibilityErrorIncompatibleWithOperation(enc, this));

0 commit comments

Comments
 (0)