Skip to content

Commit e136c59

Browse files
committed
String#end_with? only needs to know if the byte index is the head of a character
* Not the actual position of the nearest character head. * Removes the loop for the UTF-8 case.
1 parent 7969535 commit e136c59

File tree

3 files changed

+56
-70
lines changed

3 files changed

+56
-70
lines changed

src/main/java/org/truffleruby/core/encoding/EncodingLeftCharHeadNode.java

Lines changed: 0 additions & 63 deletions
This file was deleted.
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/*
2+
* Copyright (c) 2020 Oracle and/or its affiliates. All rights reserved. This
3+
* code is released under a tri EPL/GPL/LGPL license. You can use it,
4+
* redistribute it and/or modify it under the terms of the:
5+
*
6+
* Eclipse Public License version 2.0, or
7+
* GNU General Public License version 2, or
8+
* GNU Lesser General Public License version 2.1.
9+
*/
10+
package org.truffleruby.core.encoding;
11+
12+
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
13+
import com.oracle.truffle.api.dsl.Specialization;
14+
import org.truffleruby.language.RubyBaseNode;
15+
16+
/** Whether the position at byteOffset is the start of a character and not in the middle of a character */
17+
public abstract class IsCharacterHeadNode extends RubyBaseNode {
18+
19+
public static IsCharacterHeadNode create() {
20+
return IsCharacterHeadNodeGen.create();
21+
}
22+
23+
public abstract boolean execute(RubyEncoding enc, byte[] bytes, int byteOffset, int end);
24+
25+
@Specialization(guards = "enc.jcoding.isSingleByte()")
26+
protected boolean singleByte(RubyEncoding enc, byte[] bytes, int byteOffset, int end) {
27+
// return offset directly (org.jcodings.SingleByteEncoding#leftAdjustCharHead)
28+
return true;
29+
}
30+
31+
@Specialization(guards = { "!enc.jcoding.isSingleByte()", "enc.jcoding.isUTF8()" })
32+
protected boolean utf8(RubyEncoding enc, byte[] bytes, int byteOffset, int end) {
33+
// based on org.jcodings.specific.BaseUTF8Encoding#leftAdjustCharHead
34+
return utf8IsLead(bytes[byteOffset] & 0xff);
35+
36+
}
37+
38+
@TruffleBoundary
39+
@Specialization(guards = { "!enc.jcoding.isSingleByte()", "!enc.jcoding.isUTF8()" })
40+
protected boolean other(RubyEncoding enc, byte[] bytes, int byteOffset, int end) {
41+
return enc.jcoding.leftAdjustCharHead(bytes, 0, byteOffset, end) == byteOffset;
42+
}
43+
44+
/** Copied from org.jcodings.specific.BaseUTF8Encoding */
45+
private static boolean utf8IsLead(int c) {
46+
return ((c & 0xc0) & 0xff) != 0x80;
47+
}
48+
49+
}

src/main/java/org/truffleruby/core/string/StringNodes.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@
105105
import org.truffleruby.core.cast.ToRopeNodeGen;
106106
import org.truffleruby.core.cast.ToStrNode;
107107
import org.truffleruby.core.cast.ToStrNodeGen;
108-
import org.truffleruby.core.encoding.EncodingLeftCharHeadNode;
108+
import org.truffleruby.core.encoding.IsCharacterHeadNode;
109109
import org.truffleruby.core.encoding.EncodingNodes.CheckEncodingNode;
110110
import org.truffleruby.core.encoding.EncodingNodes.CheckRopeEncodingNode;
111111
import org.truffleruby.core.encoding.EncodingNodes.GetActualEncodingNode;
@@ -980,7 +980,7 @@ protected boolean bothSingleByteOptimizable(Rope stringRope, Rope otherRope) {
980980
@Primitive(name = "string_end_with?")
981981
public abstract static class EndWithNode extends CoreMethodArrayArgumentsNode {
982982

983-
@Child EncodingLeftCharHeadNode encodingLeftCharHeadNode;
983+
@Child IsCharacterHeadNode isCharacterHeadNode;
984984

985985
@Specialization
986986
protected boolean endWithBytes(Object string, Object suffix, RubyEncoding enc,
@@ -1007,7 +1007,7 @@ protected boolean endWithBytes(Object string, Object suffix, RubyEncoding enc,
10071007

10081008
final int offset = stringByteLength - suffixByteLength;
10091009

1010-
if (leftAdjustProfile.profile(leftAdjustCharHead(enc, stringByteLength, stringBytes, offset))) {
1010+
if (leftAdjustProfile.profile(!isCharacterHead(enc, stringByteLength, stringBytes, offset))) {
10111011
return false;
10121012
}
10131013

@@ -1024,12 +1024,12 @@ protected boolean endWithBytes(Object string, Object suffix, RubyEncoding enc,
10241024
return true;
10251025
}
10261026

1027-
private boolean leftAdjustCharHead(RubyEncoding enc, int stringByteLength, byte[] stringBytes, int offset) {
1028-
if (encodingLeftCharHeadNode == null) {
1027+
private boolean isCharacterHead(RubyEncoding enc, int stringByteLength, byte[] stringBytes, int offset) {
1028+
if (isCharacterHeadNode == null) {
10291029
CompilerDirectives.transferToInterpreterAndInvalidate();
1030-
encodingLeftCharHeadNode = insert(EncodingLeftCharHeadNode.create());
1030+
isCharacterHeadNode = insert(IsCharacterHeadNode.create());
10311031
}
1032-
return encodingLeftCharHeadNode.execute(enc, stringBytes, 0, offset, stringByteLength) != offset;
1032+
return isCharacterHeadNode.execute(enc, stringBytes, offset, stringByteLength);
10331033
}
10341034

10351035
}

0 commit comments

Comments
 (0)