Skip to content

Commit dcb76d1

Browse files
committed
[GR-19220] Replace boundary on StringCharacterIndexPrimitiveNode (#2383)
PullRequest: truffleruby/2782
2 parents 8202731 + 9e8381b commit dcb76d1

File tree

4 files changed

+93
-42
lines changed

4 files changed

+93
-42
lines changed

spec/ruby/core/string/gsub_spec.rb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -594,6 +594,14 @@ def obj.to_s() "ok" end
594594
end
595595
end
596596

597+
describe "String#gsub with a string pattern" do
598+
it "handles multibyte characters" do
599+
"é".gsub("é", "â").should == "â"
600+
"aé".gsub("é", "â").should == "aâ"
601+
"éa".gsub("é", "â").should == "âa"
602+
end
603+
end
604+
597605
describe "String#gsub! with pattern and replacement" do
598606
it "modifies self in place and returns self" do
599607
a = "hello"

spec/ruby/core/string/index_spec.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@
146146

147147
it "returns the character index after offset" do
148148
"われわれ".index("わ", 1).should == 2
149+
"ありがとうありがとう".index("が", 3).should == 7
149150
end
150151

151152
it "returns the character index after a partial first match" do

src/main/java/org/truffleruby/core/string/StringNodes.java

Lines changed: 83 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -4398,49 +4398,91 @@ protected int notValidUtf8(Object string, int byteIndex,
43984398
}
43994399
}
44004400

4401+
/** Search pattern in string starting after offset characters, and return a character index or nil */
44014402
@Primitive(name = "string_character_index", lowerFixnum = 2)
4402-
public abstract static class StringCharacterIndexPrimitiveNode extends PrimitiveArrayArgumentsNode {
4403+
@NodeChild(value = "string", type = RubyNode.class)
4404+
@NodeChild(value = "pattern", type = RubyNode.class)
4405+
@NodeChild(value = "offset", type = RubyNode.class)
4406+
public abstract static class StringCharacterIndexNode extends PrimitiveNode {
44034407

4404-
@TruffleBoundary
4405-
@Specialization
4406-
protected Object stringCharacterIndex(Object string, Object pattern, int offset,
4407-
@CachedLibrary(limit = "2") RubyStringLibrary stringLibrary,
4408-
@CachedLibrary(limit = "2") RubyStringLibrary patternLibrary,
4409-
@Cached RopeNodes.CalculateCharacterLengthNode calculateCharacterLengthNode) {
4410-
if (offset < 0) {
4411-
return nil;
4412-
}
4408+
@Child SingleByteOptimizableNode singleByteOptimizableNode = SingleByteOptimizableNode.create();
44134409

4414-
final Rope stringRope = stringLibrary.getRope(string);
4415-
final Rope patternRope = patternLibrary.getRope(pattern);
4410+
@CreateCast("string")
4411+
protected RubyNode coerceStringToRope(RubyNode string) {
4412+
return ToRopeNodeGen.create(string);
4413+
}
44164414

4417-
final int total = stringRope.byteLength();
4418-
int p = 0;
4419-
final int e = p + total;
4415+
@CreateCast("pattern")
4416+
protected RubyNode coercePatternToRope(RubyNode pattern) {
4417+
return ToRopeNodeGen.create(pattern);
4418+
}
4419+
4420+
@Specialization(
4421+
guards = {
4422+
"offset >= 0",
4423+
"singleByteOptimizableNode.execute(stringRope)",
4424+
"!patternFits(stringRope, patternRope, offset)" })
4425+
protected Object patternTooLarge(Rope stringRope, Rope patternRope, int offset) {
4426+
return nil;
4427+
}
4428+
4429+
@Specialization(
4430+
guards = {
4431+
"offset >= 0",
4432+
"singleByteOptimizableNode.execute(stringRope)",
4433+
"patternFits(stringRope, patternRope, offset)" })
4434+
protected Object singleByteOptimizable(Rope stringRope, Rope patternRope, int offset,
4435+
@Cached RopeNodes.BytesNode stringBytesNode,
4436+
@Cached RopeNodes.BytesNode patternBytesNode,
4437+
@Cached LoopConditionProfile loopProfile,
4438+
@Cached("createCountingProfile()") ConditionProfile matchProfile) {
4439+
4440+
int p = offset;
4441+
final int e = stringRope.byteLength();
44204442
final int pe = patternRope.byteLength();
44214443
final int l = e - pe + 1;
44224444

4423-
final byte[] stringBytes = stringRope.getBytes();
4424-
final byte[] patternBytes = patternRope.getBytes();
4445+
final byte[] stringBytes = stringBytesNode.execute(stringRope);
4446+
final byte[] patternBytes = patternBytesNode.execute(patternRope);
44254447

4426-
if (stringRope.isSingleByteOptimizable()) {
4427-
for (p += offset; p < l; p++) {
4428-
if (ArrayUtils.memcmp(stringBytes, p, patternBytes, 0, pe) == 0) {
4448+
try {
4449+
for (; loopProfile.profile(p < l); p++) {
4450+
if (matchProfile.profile(ArrayUtils.memcmp(stringBytes, p, patternBytes, 0, pe) == 0)) {
44294451
return p;
44304452
}
44314453
}
4432-
4433-
return nil;
4454+
} finally {
4455+
LoopNode.reportLoopCount(this, p - offset);
44344456
}
44354457

4458+
return nil;
4459+
}
4460+
4461+
@TruffleBoundary
4462+
@Specialization(
4463+
guards = {
4464+
"offset >= 0",
4465+
"!singleByteOptimizableNode.execute(stringRope)" })
4466+
protected Object multiByte(Rope stringRope, Rope patternRope, int offset,
4467+
@Cached RopeNodes.CalculateCharacterLengthNode calculateCharacterLengthNode,
4468+
@Cached RopeNodes.BytesNode stringBytesNode,
4469+
@Cached RopeNodes.BytesNode patternBytesNode) {
4470+
4471+
int p = 0;
4472+
final int e = stringRope.byteLength();
4473+
final int pe = patternRope.byteLength();
4474+
final int l = e - pe + 1;
4475+
4476+
final byte[] stringBytes = stringBytesNode.execute(stringRope);
4477+
final byte[] patternBytes = patternBytesNode.execute(patternRope);
4478+
44364479
final Encoding enc = stringRope.getEncoding();
44374480
final CodeRange cr = stringRope.getCodeRange();
4438-
int index = 0;
44394481
int c = 0;
4482+
int index = 0;
44404483

44414484
while (p < e && index < offset) {
44424485
c = calculateCharacterLengthNode.characterLength(enc, cr, Bytes.fromRange(stringBytes, p, e));
4443-
44444486
if (StringSupport.MBCLEN_CHARFOUND_P(c)) {
44454487
p += c;
44464488
index++;
@@ -4461,13 +4503,18 @@ protected Object stringCharacterIndex(Object string, Object pattern, int offset,
44614503

44624504
return nil;
44634505
}
4506+
4507+
protected boolean patternFits(Rope stringRope, Rope patternRope, int offset) {
4508+
return offset + patternRope.byteLength() <= stringRope.byteLength();
4509+
}
44644510
}
44654511

4512+
/** Search pattern in string starting after offset bytes, and return a byte index or nil */
44664513
@Primitive(name = "string_byte_index", lowerFixnum = 2)
44674514
@NodeChild(value = "string", type = RubyNode.class)
44684515
@NodeChild(value = "pattern", type = RubyNode.class)
44694516
@NodeChild(value = "offset", type = RubyNode.class)
4470-
public abstract static class StringByteIndexPrimitiveNode extends PrimitiveNode {
4517+
public abstract static class StringByteIndexNode extends PrimitiveNode {
44714518

44724519
@Child SingleByteOptimizableNode singleByteOptimizableNode = SingleByteOptimizableNode.create();
44734520

@@ -4481,26 +4528,17 @@ protected RubyNode coercePatternToRope(RubyNode pattern) {
44814528
return ToRopeNodeGen.create(pattern);
44824529
}
44834530

4484-
@Specialization(guards = "offset < 0")
4485-
protected Object stringByteIndexNegativeOffset(Rope stringRope, Rope patternRope, int offset) {
4486-
return nil;
4487-
}
4488-
4489-
@Specialization(
4490-
guards = {
4491-
"offset >= 0",
4492-
"singleByteOptimizableNode.execute(stringRope)",
4493-
"patternRope.byteLength() > stringRope.byteLength()" })
4494-
protected Object stringByteIndexPatternTooLarge(Rope stringRope, Rope patternRope, int offset) {
4531+
@Specialization(guards = { "offset >= 0", "!patternFits(stringRope, patternRope, offset)" })
4532+
protected Object patternTooLarge(Rope stringRope, Rope patternRope, int offset) {
44954533
return nil;
44964534
}
44974535

44984536
@Specialization(
44994537
guards = {
45004538
"offset >= 0",
45014539
"singleByteOptimizableNode.execute(stringRope)",
4502-
"patternRope.byteLength() <= stringRope.byteLength()" })
4503-
protected Object stringCharacterIndexSingleByteOptimizable(Rope stringRope, Rope patternRope, int offset,
4540+
"patternFits(stringRope, patternRope, offset)" })
4541+
protected Object singleByteOptimizable(Rope stringRope, Rope patternRope, int offset,
45044542
@Cached RopeNodes.BytesNode stringBytesNode,
45054543
@Cached RopeNodes.BytesNode patternBytesNode,
45064544
@Cached LoopConditionProfile loopProfile,
@@ -4532,8 +4570,8 @@ protected Object stringCharacterIndexSingleByteOptimizable(Rope stringRope, Rope
45324570
guards = {
45334571
"offset >= 0",
45344572
"!singleByteOptimizableNode.execute(stringRope)",
4535-
"patternRope.byteLength() <= stringRope.byteLength()" })
4536-
protected Object stringCharacterIndex(Rope stringRope, Rope patternRope, int offset,
4573+
"patternFits(stringRope, patternRope, offset)" })
4574+
protected Object multiByte(Rope stringRope, Rope patternRope, int offset,
45374575
@Cached RopeNodes.CalculateCharacterLengthNode calculateCharacterLengthNode,
45384576
@Cached RopeNodes.BytesNode stringBytesNode,
45394577
@Cached RopeNodes.BytesNode patternBytesNode) {
@@ -4548,7 +4586,7 @@ protected Object stringCharacterIndex(Rope stringRope, Rope patternRope, int off
45484586

45494587
final Encoding enc = stringRope.getEncoding();
45504588
final CodeRange cr = stringRope.getCodeRange();
4551-
int c = 0;
4589+
int c;
45524590

45534591
for (; p < l; p += c) {
45544592
c = calculateCharacterLengthNode.characterLength(enc, cr, Bytes.fromRange(stringBytes, p, e));
@@ -4562,6 +4600,10 @@ protected Object stringCharacterIndex(Rope stringRope, Rope patternRope, int off
45624600

45634601
return nil;
45644602
}
4603+
4604+
protected boolean patternFits(Rope stringRope, Rope patternRope, int offset) {
4605+
return offset + patternRope.byteLength() <= stringRope.byteLength();
4606+
}
45654607
}
45664608

45674609
/** Calculates the byte offset of a character, indicated by a character index, starting from a provided byte offset

tool/jt.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2319,7 +2319,7 @@ def command_format(changed_java_files = nil)
23192319
format_specializations_check
23202320
end
23212321

2322-
private def check_filename_length
2322+
def check_filename_length
23232323
# For eCryptfs, see https://bugs.launchpad.net/ecryptfs/+bug/344878
23242324
max_length = 143
23252325

0 commit comments

Comments
 (0)