Skip to content

Commit da607f1

Browse files
author
Nicolas Laurent
committed
[GR-21660] [GR-26446] Stop rebalancing trees of ConcatRopes on append
PullRequest: truffleruby/2038
2 parents 3ee5d06 + 8ea672c commit da607f1

File tree

9 files changed

+113
-198
lines changed

9 files changed

+113
-198
lines changed

src/main/java/org/truffleruby/core/rope/ConcatRope.java

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
import org.jcodings.Encoding;
1313
import org.jcodings.specific.ASCIIEncoding;
1414

15-
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
16-
1715
public class ConcatRope extends ManagedRope {
1816

1917
private final ManagedRope left;
@@ -79,16 +77,6 @@ Rope withBinaryEncoding() {
7977
balanced);
8078
}
8179

82-
@Override
83-
@TruffleBoundary
84-
public byte getByteSlow(int index) {
85-
if (index < left.byteLength()) {
86-
return left.getByteSlow(index);
87-
}
88-
89-
return right.getByteSlow(index - left.byteLength());
90-
}
91-
9280
public ManagedRope getLeft() {
9381
return left;
9482
}

src/main/java/org/truffleruby/core/rope/ManagedRope.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@ public final int characterLength() {
3939
return characterLength;
4040
}
4141

42+
@Override
43+
protected byte getByteSlow(int index) {
44+
return RopeOperations.getByteSlow(this, index);
45+
}
46+
4247
@Override
4348
public final byte[] getBytes() {
4449
if (bytes == null) {

src/main/java/org/truffleruby/core/rope/RepeatingRope.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,11 +68,6 @@ protected byte[] getBytesSlow() {
6868
return super.getBytesSlow();
6969
}
7070

71-
@Override
72-
protected byte getByteSlow(int index) {
73-
return child.getByteSlow(index % child.byteLength());
74-
}
75-
7671
public ManagedRope getChild() {
7772
return child;
7873
}

src/main/java/org/truffleruby/core/rope/RopeNodes.java

Lines changed: 1 addition & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,7 @@
1919
import static org.truffleruby.core.rope.CodeRange.CR_UNKNOWN;
2020
import static org.truffleruby.core.rope.CodeRange.CR_VALID;
2121

22-
import java.util.ArrayDeque;
2322
import java.util.Arrays;
24-
import java.util.Deque;
2523

2624
import org.jcodings.Encoding;
2725
import org.jcodings.specific.ASCIIEncoding;
@@ -425,8 +423,6 @@ public static ConcatNode create() {
425423
return RopeNodesFactory.ConcatNodeGen.create();
426424
}
427425

428-
@Child private FlattenNode flattenNode;
429-
430426
public abstract Rope executeConcat(Rope left, Rope right, Encoding encoding);
431427

432428
@Specialization
@@ -467,8 +463,7 @@ protected Rope concatRightEmpty(ManagedRope left, Rope right, Encoding encoding,
467463
@Specialization(guards = { "!left.isEmpty()", "!right.isEmpty()", "!isCodeRangeBroken(left, right)" })
468464
protected Rope concat(ManagedRope left, ManagedRope right, Encoding encoding,
469465
@Cached ConditionProfile sameCodeRangeProfile,
470-
@Cached ConditionProfile brokenCodeRangeProfile,
471-
@Cached ConditionProfile shouldRebalanceProfile) {
466+
@Cached ConditionProfile brokenCodeRangeProfile) {
472467
try {
473468
Math.addExact(left.byteLength(), right.byteLength());
474469
} catch (ArithmeticException e) {
@@ -477,18 +472,7 @@ protected Rope concat(ManagedRope left, ManagedRope right, Encoding encoding,
477472
getContext().getCoreExceptions().argumentErrorTooLargeString(this));
478473
}
479474

480-
if (shouldRebalanceProfile.profile(
481-
left.depth() >= getContext().getOptions().ROPE_DEPTH_THRESHOLD && left instanceof ConcatRope)) {
482-
left = rebalance((ConcatRope) left, getContext().getOptions().ROPE_DEPTH_THRESHOLD, getFlattenNode());
483-
}
484-
485-
if (shouldRebalanceProfile.profile(
486-
right.depth() >= getContext().getOptions().ROPE_DEPTH_THRESHOLD && right instanceof ConcatRope)) {
487-
right = rebalance((ConcatRope) right, getContext().getOptions().ROPE_DEPTH_THRESHOLD, getFlattenNode());
488-
}
489-
490475
int depth = depth(left, right);
491-
/* if (depth >= 10) { System.out.println("ConcatRope depth: " + depth); } */
492476

493477
return new ConcatRope(
494478
left,
@@ -503,14 +487,6 @@ protected Rope concat(ManagedRope left, ManagedRope right, Encoding encoding,
503487
isBalanced(left, right));
504488
}
505489

506-
private FlattenNode getFlattenNode() {
507-
if (flattenNode == null) {
508-
CompilerDirectives.transferToInterpreterAndInvalidate();
509-
flattenNode = insert(FlattenNode.create());
510-
}
511-
return flattenNode;
512-
}
513-
514490
private boolean isBalanced(Rope left, Rope right) {
515491
// Our definition of balanced is centered around the notion of rebalancing. We could have a simple structure
516492
// such as ConcatRope(ConcatRope(LeafRope, LeafRope), LeafRope) that is balanced on its own but may contribute
@@ -532,81 +508,6 @@ private boolean isBalanced(Rope left, Rope right) {
532508
}
533509
}
534510

535-
@TruffleBoundary
536-
private ManagedRope rebalance(ConcatRope rope, int depthThreshold, FlattenNode flattenNode) {
537-
Deque<ManagedRope> currentRopeQueue = new ArrayDeque<>();
538-
Deque<ManagedRope> nextLevelQueue = new ArrayDeque<>();
539-
540-
linearizeTree(rope.getLeft(), currentRopeQueue);
541-
linearizeTree(rope.getRight(), currentRopeQueue);
542-
543-
final int flattenThreshold = depthThreshold / 2;
544-
545-
ManagedRope root = null;
546-
while (!currentRopeQueue.isEmpty()) {
547-
ManagedRope left = currentRopeQueue.pop();
548-
549-
if (left.depth() >= flattenThreshold) {
550-
// TODO (eregon, 15 June 2020): should this cache the resulting bytes by using getBytes()?
551-
left = flattenNode.executeFlatten(left);
552-
}
553-
554-
if (currentRopeQueue.isEmpty()) {
555-
if (nextLevelQueue.isEmpty()) {
556-
root = left;
557-
} else {
558-
// If a rope can't be paired with another rope at the current level (i.e., odd numbers of ropes),
559-
// it needs to be promoted to the next level where it will be tried again. Since by definition
560-
// every rope already present in the next level must have occurred before this rope in the current
561-
// level, this rope must be added to the end of the list in the next level to maintain proper
562-
// position.
563-
nextLevelQueue.add(left);
564-
}
565-
} else {
566-
ManagedRope right = currentRopeQueue.pop();
567-
568-
if (right.depth() >= flattenThreshold) {
569-
// TODO (eregon, 15 June 2020): should this cache the resulting bytes by using getBytes()?
570-
right = flattenNode.executeFlatten(right);
571-
}
572-
573-
final ManagedRope child = new ConcatRope(
574-
left,
575-
right,
576-
rope.getEncoding(),
577-
commonCodeRange(left.getCodeRange(), right.getCodeRange()),
578-
depth(left, right),
579-
isBalanced(left, right));
580-
581-
nextLevelQueue.add(child);
582-
}
583-
584-
if (currentRopeQueue.isEmpty() && !nextLevelQueue.isEmpty()) {
585-
currentRopeQueue = nextLevelQueue;
586-
nextLevelQueue = new ArrayDeque<>();
587-
}
588-
}
589-
590-
return root;
591-
}
592-
593-
@TruffleBoundary
594-
private void linearizeTree(ManagedRope rope, Deque<ManagedRope> ropeQueue) {
595-
if (rope instanceof ConcatRope) {
596-
final ConcatRope concatRope = (ConcatRope) rope;
597-
598-
// If a rope is known to be balanced, there's no need to rebalance it.
599-
if (concatRope.isBalanced()) {
600-
ropeQueue.add(concatRope);
601-
} else {
602-
linearizeTree(concatRope.getLeft(), ropeQueue);
603-
linearizeTree(concatRope.getRight(), ropeQueue);
604-
}
605-
} else {
606-
// We never rebalance non-ConcatRopes since that requires per-rope type logic with likely minimal benefit.
607-
ropeQueue.add(rope);
608-
}
609-
}
610511

611512
@SuppressFBWarnings("RV")
612513
@Specialization(guards = { "!left.isEmpty()", "!right.isEmpty()", "isCodeRangeBroken(left, right)" })

src/main/java/org/truffleruby/core/rope/RopeOperations.java

Lines changed: 107 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,36 @@ public static byte[] flattenBytes(Rope rope) {
497497
return buffer;
498498
}
499499

500+
/** Used to implement {@link Rope#getByteSlow(int)} in a non-recursive fashion for some Rope subclasses. Do not call
501+
* directly, call {@link Rope#getByteSlow(int) instead.} */
502+
@TruffleBoundary
503+
static byte getByteSlow(Rope rope, int index) {
504+
while (true) {
505+
final byte[] rawBytes = rope.getRawBytes();
506+
if (rawBytes != null) {
507+
return rawBytes[index];
508+
} else if (rope instanceof ConcatRope) {
509+
final ConcatRope concatRope = (ConcatRope) rope;
510+
final Rope left = concatRope.getLeft();
511+
if (index < left.byteLength()) {
512+
rope = left;
513+
} else {
514+
rope = concatRope.getRight();
515+
index -= left.byteLength();
516+
}
517+
} else if (rope instanceof SubstringRope) {
518+
final SubstringRope substringRope = (SubstringRope) rope;
519+
rope = substringRope.getChild();
520+
index += substringRope.getByteOffset();
521+
} else if (rope instanceof RepeatingRope) {
522+
rope = ((RepeatingRope) rope).getChild();
523+
index %= rope.byteLength();
524+
} else {
525+
return rope.getByteSlow(index);
526+
}
527+
}
528+
}
529+
500530
private static int computeLoopCount(int offset, int times, int length, int patternLength) {
501531
// The loopCount has to be precisely determined so every repetition has at least some parts used.
502532
// It has to account for the beginning we don't need (offset), has to reach the end but, and must not
@@ -508,74 +538,92 @@ private static int computeLoopCount(int offset, int times, int length, int patte
508538

509539
@TruffleBoundary
510540
public static int hashForRange(Rope rope, int startingHashCode, int offset, int length) {
511-
if (rope instanceof LeafRope) {
512-
return Hashing.stringHash(rope.getBytes(), startingHashCode, offset, length);
513-
} else if (rope instanceof SubstringRope) {
514-
final SubstringRope substringRope = (SubstringRope) rope;
515-
516-
return hashForRange(
517-
substringRope.getChild(),
518-
startingHashCode,
519-
offset + substringRope.getByteOffset(),
520-
length);
521-
} else if (rope instanceof ConcatRope) {
522-
final ConcatRope concatRope = (ConcatRope) rope;
523-
final Rope left = concatRope.getLeft();
524-
final Rope right = concatRope.getRight();
525-
526-
int hash = startingHashCode;
527-
final int leftLength = left.byteLength();
541+
class Params {
542+
final Rope rope;
543+
final int startingHashCode;
544+
final int offset;
545+
final int length;
546+
final boolean readResult;
547+
final int repeatCount;
548+
549+
Params(Rope rope, int startingHashCode, int offset, int length, boolean readResult, int repeatCount) {
550+
this.rope = rope;
551+
this.startingHashCode = startingHashCode;
552+
this.offset = offset;
553+
this.length = length;
554+
this.readResult = readResult;
555+
this.repeatCount = repeatCount;
556+
}
557+
}
528558

529-
if (offset < leftLength) {
530-
// The left branch might not be large enough to extract the full hash code we want. In that case,
531-
// we'll extract what we can and extract the difference from the right side.
532-
if (offset + length > leftLength) {
533-
final int coveredByLeft = leftLength - offset;
534-
hash = hashForRange(left, hash, offset, coveredByLeft);
535-
hash = hashForRange(right, hash, 0, length - coveredByLeft);
559+
final Deque<Params> workStack = new ArrayDeque<>();
560+
workStack.push(new Params(rope, startingHashCode, offset, length, false, -1));
561+
int resultHash = 0;
536562

537-
return hash;
563+
while (!workStack.isEmpty()) {
564+
final Params params = workStack.pop();
565+
rope = params.rope;
566+
startingHashCode = params.readResult ? resultHash : params.startingHashCode;
567+
offset = params.offset;
568+
length = params.length;
569+
final byte[] bytes = rope.getRawBytes();
570+
571+
if (bytes != null) {
572+
resultHash = Hashing.stringHash(bytes, startingHashCode, offset, length);
573+
} else if (rope instanceof SubstringRope) {
574+
final SubstringRope substringRope = (SubstringRope) rope;
575+
workStack.push(
576+
new Params(
577+
substringRope.getChild(),
578+
startingHashCode,
579+
offset + substringRope.getByteOffset(),
580+
length,
581+
false,
582+
-1));
583+
} else if (rope instanceof ConcatRope) {
584+
final ConcatRope concatRope = (ConcatRope) rope;
585+
final Rope left = concatRope.getLeft();
586+
final Rope right = concatRope.getRight();
587+
final int leftLength = left.byteLength();
588+
589+
if (offset >= leftLength) {
590+
// range fully contained in right child
591+
workStack.push(new Params(right, startingHashCode, offset - leftLength, length, false, -1));
592+
} else if (offset + length <= leftLength) {
593+
// range fully contained in left child
594+
workStack.push(new Params(left, startingHashCode, offset, length, false, -1));
538595
} else {
539-
return hashForRange(left, hash, offset, length);
596+
final int coveredByLeft = leftLength - offset;
597+
// push right node first, starting hash is the result from the left node
598+
workStack.push(new Params(right, 0, 0, length - coveredByLeft, true, -1));
599+
workStack.push(new Params(left, startingHashCode, offset, coveredByLeft, false, -1));
540600
}
541-
}
542-
543-
return hashForRange(right, hash, offset - leftLength, length);
544-
} else if (rope instanceof RepeatingRope) {
545-
final RepeatingRope repeatingRope = (RepeatingRope) rope;
546-
final Rope child = repeatingRope.getChild();
547-
548-
int bytesToHash = length;
549-
final int patternLength = child.byteLength();
550-
551-
// Fix the offset to be appropriate for a given child. The offset is reset the first time it is
552-
// consumed, so there's no need to worry about adversely affecting anything by adjusting it here.
553-
offset %= child.byteLength();
601+
} else if (rope instanceof RepeatingRope) {
602+
final RepeatingRope repeatingRope = (RepeatingRope) rope;
603+
final Rope child = repeatingRope.getChild();
604+
final int patternLength = child.byteLength();
554605

555-
final int loopCount = computeLoopCount(offset, repeatingRope.getTimes(), bytesToHash, patternLength);
606+
int count = params.repeatCount;
607+
assert count != 0;
608+
if (count < 0) {
609+
offset %= patternLength; // the offset is always 0 when count > 0
610+
count = computeLoopCount(offset, repeatingRope.getTimes(), length, patternLength);
611+
}
556612

557-
int hash = startingHashCode;
558-
for (int i = 0; i < loopCount; i++) {
559-
final int bytesToHashThisPass = bytesToHash + offset > patternLength
613+
if (count > 1) {
614+
// loop - 1 iteration, reset offset to 0, starting hash is the result from previous iteration
615+
workStack.push(new Params(rope, 0, 0, length - patternLength, true, count - 1));
616+
}
617+
// one iteration
618+
final int bytesToHashThisPass = length + offset > patternLength
560619
? patternLength - offset
561-
: bytesToHash;
562-
hash = hashForRange(
563-
child,
564-
hash,
565-
offset,
566-
bytesToHashThisPass);
567-
offset = 0;
568-
bytesToHash -= bytesToHashThisPass;
620+
: length;
621+
workStack.push(new Params(child, startingHashCode, offset, bytesToHashThisPass, false, -1));
622+
} else {
623+
resultHash = Hashing.stringHash(rope.getBytes(), startingHashCode, offset, length);
569624
}
570-
571-
return hash;
572-
} else if (rope instanceof LazyIntRope) {
573-
return Hashing.stringHash(rope.getBytes(), startingHashCode, offset, length);
574-
} else if (rope instanceof NativeRope) {
575-
return Hashing.stringHash(rope.getBytes(), startingHashCode, offset, length);
576-
} else {
577-
throw new RuntimeException("Hash code not supported for rope of type: " + rope.getClass().getName());
578625
}
626+
return resultHash;
579627
}
580628

581629
public static RopeBuilder toRopeBuilderCopy(Rope rope) {

src/main/java/org/truffleruby/core/rope/SubstringRope.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,6 @@ protected byte[] getBytesSlow() {
8080
return super.getBytesSlow();
8181
}
8282

83-
@Override
84-
public byte getByteSlow(int index) {
85-
return child.getByteSlow(index + byteOffset);
86-
}
87-
8883
public ManagedRope getChild() {
8984
return child;
9085
}

0 commit comments

Comments
 (0)