Skip to content

Commit 153a878

Browse files
committed
Print out the match string's encoding in instrumented output.
Also, fix `RegexpCacheKey` to use the encoding of the regexp rather than the string being matched against. By storing the encoding of the string being matched against in `MatchInfo`, we'll have separate entries for matches that share the same regexp instance, but different match string encodings. That's ultimately what we want in order to gain better insights into how regexps are being used in the application.
1 parent 5d0863e commit 153a878

File tree

1 file changed

+14
-6
lines changed

1 file changed

+14
-6
lines changed

src/main/java/org/truffleruby/core/regexp/TruffleRegexpNodes.java

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import com.oracle.truffle.api.profiles.IntValueProfile;
2929
import com.oracle.truffle.api.profiles.LoopConditionProfile;
3030
import org.graalvm.collections.Pair;
31+
import org.jcodings.Encoding;
3132
import org.jcodings.specific.ASCIIEncoding;
3233
import org.jcodings.specific.USASCIIEncoding;
3334
import org.jcodings.specific.UTF8Encoding;
@@ -86,11 +87,11 @@ public class TruffleRegexpNodes {
8687
private static void instrumentMatch(ConcurrentHashMap<MatchInfo, AtomicInteger> metricsMap, RubyRegexp regexp,
8788
Object string, boolean fromStart) {
8889
Rope source = regexp.source;
89-
RubyEncoding enc = RubyStringLibrary.getUncached().getEncoding(string);
9090
RegexpOptions options = regexp.options;
9191
TruffleRegexpNodes.MatchInfo matchInfo = new TruffleRegexpNodes.MatchInfo(
92-
new RegexpCacheKey(source, enc, options, Hashing.NO_SEED),
93-
fromStart);
92+
new RegexpCacheKey(source, regexp.encoding, options, Hashing.NO_SEED),
93+
fromStart,
94+
RubyStringLibrary.getUncached().getEncoding(string));
9495
ConcurrentOperations.getOrCompute(metricsMap, matchInfo, x -> new AtomicInteger()).incrementAndGet();
9596
}
9697

@@ -737,11 +738,13 @@ static final class MatchInfo {
737738

738739
private final RegexpCacheKey regexpInfo;
739740
private final boolean matchStart;
741+
private final RubyEncoding matchEncoding;
740742

741-
MatchInfo(RegexpCacheKey regexpInfo, boolean matchStart) {
743+
MatchInfo(RegexpCacheKey regexpInfo, boolean matchStart, RubyEncoding matchEncoding) {
742744
assert regexpInfo != null;
743745
this.regexpInfo = regexpInfo;
744746
this.matchStart = matchStart;
747+
this.matchEncoding = matchEncoding;
745748
}
746749

747750
@Override
@@ -760,12 +763,17 @@ public boolean equals(Object obj) {
760763
}
761764

762765
MatchInfo other = (MatchInfo) obj;
763-
return matchStart == other.matchStart && regexpInfo.equals(other.regexpInfo);
766+
return matchStart == other.matchStart && matchEncoding == other.matchEncoding &&
767+
regexpInfo.equals(other.regexpInfo);
764768
}
765769

766770
@Override
767771
public String toString() {
768-
return String.format("Match (%s, fromStart = %s)", regexpInfo, matchStart);
772+
return String.format(
773+
"Match (%s, fromStart = %s, encoding = %s)",
774+
regexpInfo,
775+
matchStart,
776+
RopeOperations.decodeOrEscapeBinaryRope(matchEncoding.name.rope));
769777
}
770778
}
771779

0 commit comments

Comments
 (0)