Skip to content

Commit 6e41f81

Browse files
committed
Add a JSON output option for regexp instrumentation.
1 parent 153a878 commit 6e41f81

File tree

6 files changed

+244
-22
lines changed

6 files changed

+244
-22
lines changed

src/main/java/org/truffleruby/core/regexp/RegexpCacheKey.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,18 @@ public RegexpCacheKey(Rope rope, RubyEncoding encoding, RegexpOptions options, H
3030
this.hashing = hashing;
3131
}
3232

33+
public Rope getRope() {
34+
return rope;
35+
}
36+
37+
public RubyEncoding getEncoding() {
38+
return encoding;
39+
}
40+
41+
public int getJoniOptions() {
42+
return joniOptions;
43+
}
44+
3345
@Override
3446
public int hashCode() {
3547
return hashing.hash(rope.hashCode());

src/main/java/org/truffleruby/core/regexp/TruffleRegexpNodes.java

Lines changed: 173 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import java.util.Map;
1414
import java.util.Map.Entry;
1515
import java.util.Objects;
16+
import java.util.Optional;
1617
import java.util.Set;
1718
import java.util.concurrent.ConcurrentHashMap;
1819
import java.util.concurrent.atomic.AtomicInteger;
@@ -28,7 +29,6 @@
2829
import com.oracle.truffle.api.profiles.IntValueProfile;
2930
import com.oracle.truffle.api.profiles.LoopConditionProfile;
3031
import org.graalvm.collections.Pair;
31-
import org.jcodings.Encoding;
3232
import org.jcodings.specific.ASCIIEncoding;
3333
import org.jcodings.specific.USASCIIEncoding;
3434
import org.jcodings.specific.UTF8Encoding;
@@ -50,6 +50,9 @@
5050
import org.truffleruby.core.array.RubyArray;
5151
import org.truffleruby.core.encoding.Encodings;
5252
import org.truffleruby.core.encoding.RubyEncoding;
53+
import org.truffleruby.core.hash.HashOperations;
54+
import org.truffleruby.core.hash.RubyHash;
55+
import org.truffleruby.core.hash.library.HashStoreLibrary;
5356
import org.truffleruby.core.kernel.KernelNodes.SameOrEqualNode;
5457
import org.truffleruby.core.regexp.RegexpNodes.ToSNode;
5558
import org.truffleruby.core.regexp.TruffleRegexpNodesFactory.MatchNodeGen;
@@ -439,6 +442,175 @@ protected Object buildUnusedRegexpsArray(
439442
}
440443
}
441444

445+
@CoreMethod(names = "compiled_regexp_hash_array", onSingleton = true, required = 0)
446+
public abstract static class CompiledRegexpHashArray extends CoreMethodArrayArgumentsNode {
447+
448+
@Specialization
449+
protected Object buildInfoArray(
450+
@Cached ArrayBuilderNode arrayBuilderNode,
451+
@CachedLibrary(limit = "3") HashStoreLibrary hashStoreLibrary) {
452+
final Set<RegexpCacheKey> compiledRegexps = new HashSet<>();
453+
compiledRegexps.addAll(COMPILED_REGEXPS_DYNAMIC.keySet());
454+
compiledRegexps.addAll(COMPILED_REGEXPS_LITERAL.keySet());
455+
456+
final Set<RegexpCacheKey> matchedRegexps = new HashSet<>();
457+
matchedRegexps.addAll(
458+
MATCHED_REGEXPS_JONI
459+
.keySet()
460+
.stream()
461+
.map(matchInfo -> matchInfo.regexpInfo)
462+
.collect(Collectors.toSet()));
463+
matchedRegexps.addAll(
464+
MATCHED_REGEXPS_TREGEX
465+
.keySet()
466+
.stream()
467+
.map(matchInfo -> matchInfo.regexpInfo)
468+
.collect(Collectors.toSet()));
469+
470+
final int arraySize = COMPILED_REGEXPS_LITERAL.size() + COMPILED_REGEXPS_DYNAMIC.size();
471+
final BuilderState state = arrayBuilderNode.start(arraySize);
472+
473+
processGroup(COMPILED_REGEXPS_LITERAL, matchedRegexps, true, hashStoreLibrary, arrayBuilderNode, state, 0);
474+
processGroup(
475+
COMPILED_REGEXPS_DYNAMIC,
476+
matchedRegexps,
477+
false,
478+
hashStoreLibrary,
479+
arrayBuilderNode,
480+
state,
481+
COMPILED_REGEXPS_LITERAL.size());
482+
483+
return createArray(arrayBuilderNode.finish(state, arraySize), arraySize);
484+
}
485+
486+
private void processGroup(ConcurrentHashMap<RegexpCacheKey, AtomicInteger> group,
487+
Set<RegexpCacheKey> matchedRegexps,
488+
boolean isRegexpLiteral,
489+
HashStoreLibrary hashStoreLibrary,
490+
ArrayBuilderNode arrayBuilderNode, BuilderState state, int offset) {
491+
int n = 0;
492+
for (Entry<RegexpCacheKey, AtomicInteger> entry : group.entrySet()) {
493+
arrayBuilderNode
494+
.appendValue(
495+
state,
496+
offset + n,
497+
buildRegexInfoHash(
498+
getContext(),
499+
getLanguage(),
500+
hashStoreLibrary,
501+
entry.getKey(),
502+
matchedRegexps.contains(entry.getKey()),
503+
Optional.of(isRegexpLiteral),
504+
Optional.of(entry.getValue())));
505+
n++;
506+
}
507+
}
508+
509+
protected static RubyHash buildRegexInfoHash(RubyContext context, RubyLanguage language,
510+
HashStoreLibrary hashStoreLibrary, RegexpCacheKey regexpInfo, boolean isUsed,
511+
Optional<Boolean> isRegexpLiteral,
512+
Optional<AtomicInteger> count) {
513+
final RubyHash hash = HashOperations.newEmptyHash(context, language);
514+
515+
hashStoreLibrary.set(
516+
hash.store,
517+
hash,
518+
language.getSymbol("value"),
519+
StringOperations.createUTF8String(context, language, regexpInfo.getRope()),
520+
true);
521+
522+
if (count.isPresent()) {
523+
hashStoreLibrary.set(hash.store, hash, language.getSymbol("count"), count.get().get(), true);
524+
}
525+
526+
if (isRegexpLiteral.isPresent()) {
527+
hashStoreLibrary.set(hash.store, hash, language.getSymbol("isLiteral"), isRegexpLiteral.get(), true);
528+
}
529+
530+
hashStoreLibrary.set(hash.store, hash, language.getSymbol("isUsed"), isUsed, true);
531+
hashStoreLibrary.set(hash.store, hash, language.getSymbol("encoding"), regexpInfo.getEncoding(), true);
532+
hashStoreLibrary.set(
533+
hash.store,
534+
hash,
535+
language.getSymbol("options"),
536+
RegexpOptions.fromJoniOptions(regexpInfo.getJoniOptions()).toOptions(),
537+
true);
538+
539+
assert hashStoreLibrary.verify(hash.store, hash);
540+
541+
return hash;
542+
}
543+
}
544+
545+
@CoreMethod(names = "matched_regexp_hash_array", onSingleton = true, required = 0)
546+
public abstract static class MatchedRegexpHashArray extends CoreMethodArrayArgumentsNode {
547+
548+
@Specialization
549+
protected Object buildInfoArray(
550+
@Cached ArrayBuilderNode arrayBuilderNode,
551+
@CachedLibrary(limit = "3") HashStoreLibrary hashStoreLibrary) {
552+
final int arraySize = (MATCHED_REGEXPS_JONI.size() + MATCHED_REGEXPS_TREGEX.size());
553+
554+
final BuilderState state = arrayBuilderNode.start(arraySize);
555+
556+
processGroup(MATCHED_REGEXPS_JONI, false, hashStoreLibrary, arrayBuilderNode, state, 0);
557+
processGroup(
558+
MATCHED_REGEXPS_TREGEX,
559+
true,
560+
hashStoreLibrary,
561+
arrayBuilderNode,
562+
state,
563+
MATCHED_REGEXPS_JONI.size());
564+
565+
return createArray(arrayBuilderNode.finish(state, arraySize), arraySize);
566+
}
567+
568+
private void processGroup(ConcurrentHashMap<MatchInfo, AtomicInteger> group,
569+
boolean isTRegexMatch,
570+
HashStoreLibrary hashStoreLibrary,
571+
ArrayBuilderNode arrayBuilderNode, BuilderState state, int offset) {
572+
int n = 0;
573+
for (Entry<MatchInfo, AtomicInteger> entry : group.entrySet()) {
574+
arrayBuilderNode
575+
.appendValue(
576+
state,
577+
offset + n,
578+
buildHash(hashStoreLibrary, isTRegexMatch, entry.getKey(), entry.getValue()));
579+
n++;
580+
}
581+
}
582+
583+
private RubyHash buildHash(HashStoreLibrary hashStoreLibrary, boolean isTRegexMatch, MatchInfo matchInfo,
584+
AtomicInteger count) {
585+
final RubyHash regexpInfoHash = CompiledRegexpHashArray.buildRegexInfoHash(
586+
getContext(),
587+
getLanguage(),
588+
hashStoreLibrary,
589+
matchInfo.regexpInfo,
590+
true,
591+
Optional.empty(),
592+
Optional.empty());
593+
final RubyHash matchInfoHash = HashOperations.newEmptyHash(getContext(), getLanguage());
594+
595+
hashStoreLibrary
596+
.set(matchInfoHash.store, matchInfoHash, getLanguage().getSymbol("regexp"), regexpInfoHash, true);
597+
hashStoreLibrary
598+
.set(matchInfoHash.store, matchInfoHash, getLanguage().getSymbol("count"), count.get(), true);
599+
hashStoreLibrary
600+
.set(matchInfoHash.store, matchInfoHash, getLanguage().getSymbol("isTRegex"), isTRegexMatch, true);
601+
hashStoreLibrary.set(
602+
matchInfoHash.store,
603+
matchInfoHash,
604+
getLanguage().getSymbol("fromStart"),
605+
matchInfo.matchStart,
606+
true);
607+
608+
assert hashStoreLibrary.verify(matchInfoHash.store, matchInfoHash);
609+
610+
return matchInfoHash;
611+
}
612+
}
613+
442614
@Primitive(name = "regexp_initialized?")
443615
public abstract static class InitializedNode extends CoreMethodArrayArgumentsNode {
444616

src/main/java/org/truffleruby/options/Options.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,8 @@ public class Options {
193193
public final boolean METHODMISSING_ALWAYS_INLINE;
194194
/** --regexp-instrument-match=false */
195195
public final boolean REGEXP_INSTRUMENT_MATCH;
196+
/** --regexp-instrumentation-output-format="text" */
197+
public final String REGEXP_INSTRUMENT_OUTPUT_FORMAT;
196198
/** --metrics-time-parsing-file=false */
197199
public final boolean METRICS_TIME_PARSING_FILE;
198200
/** --metrics-time-require=false */
@@ -288,6 +290,7 @@ public Options(Env env, OptionValues options, LanguageOptions languageOptions) {
288290
METHODMISSING_ALWAYS_CLONE = options.hasBeenSet(OptionsCatalog.METHODMISSING_ALWAYS_CLONE_KEY) ? options.get(OptionsCatalog.METHODMISSING_ALWAYS_CLONE_KEY) : CLONE_DEFAULT;
289291
METHODMISSING_ALWAYS_INLINE = options.hasBeenSet(OptionsCatalog.METHODMISSING_ALWAYS_INLINE_KEY) ? options.get(OptionsCatalog.METHODMISSING_ALWAYS_INLINE_KEY) : INLINE_DEFAULT;
290292
REGEXP_INSTRUMENT_MATCH = options.get(OptionsCatalog.REGEXP_INSTRUMENT_MATCH_KEY);
293+
REGEXP_INSTRUMENT_OUTPUT_FORMAT = options.get(OptionsCatalog.REGEXP_INSTRUMENT_OUTPUT_FORMAT_KEY);
291294
METRICS_TIME_PARSING_FILE = options.get(OptionsCatalog.METRICS_TIME_PARSING_FILE_KEY);
292295
METRICS_TIME_REQUIRE = options.get(OptionsCatalog.METRICS_TIME_REQUIRE_KEY);
293296
TESTING_RUBYGEMS = options.get(OptionsCatalog.TESTING_RUBYGEMS_KEY);
@@ -466,6 +469,8 @@ public Object fromDescriptor(OptionDescriptor descriptor) {
466469
return METHODMISSING_ALWAYS_INLINE;
467470
case "ruby.regexp-instrument-match":
468471
return REGEXP_INSTRUMENT_MATCH;
472+
case "ruby.regexp-instrumentation-output-format":
473+
return REGEXP_INSTRUMENT_OUTPUT_FORMAT;
469474
case "ruby.metrics-time-parsing-file":
470475
return METRICS_TIME_PARSING_FILE;
471476
case "ruby.metrics-time-require":

src/main/ruby/truffleruby/core/truffle/regexp_operations.rb

Lines changed: 42 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -171,33 +171,54 @@ def self.match_stats_tregex
171171
end
172172

173173
def self.print_stats
174-
puts '--------------------'
175-
puts 'Regular expression statistics'
176-
puts '--------------------'
177-
178-
if Truffle::Boot.get_option('regexp-instrument-creation')
179-
puts ' Compilation (Literal)'
180-
print_stats_table compilation_stats_literal_regexp
181-
puts ' --------------------'
182-
puts ' Compilation (Dynamic)'
183-
print_stats_table compilation_stats_dynamic_regexp
184-
puts ' --------------------'
185-
end
174+
output_format = Truffle::Boot.get_option('regexp-instrumentation-output-format')
186175

187-
if Truffle::Boot.get_option('regexp-instrument-match')
188-
puts ' Matches (Joni)'
189-
print_stats_table match_stats_joni
190-
puts ' --------------------'
191-
puts ' Matches (TRegex)'
192-
print_stats_table match_stats_tregex
176+
if output_format == 'text'
177+
puts '--------------------'
178+
puts 'Regular expression statistics'
179+
puts '--------------------'
193180

194181
if Truffle::Boot.get_option('regexp-instrument-creation')
182+
puts ' Compilation (Literal)'
183+
print_stats_table compilation_stats_literal_regexp
184+
puts ' --------------------'
185+
puts ' Compilation (Dynamic)'
186+
print_stats_table compilation_stats_dynamic_regexp
195187
puts ' --------------------'
196-
puts ' Unused Regexps'
197-
puts unused_regexps_array.map { |regexp| " #{regexp}" }
198188
end
199189

200-
puts '--------------------'
190+
if Truffle::Boot.get_option('regexp-instrument-match')
191+
puts ' Matches (Joni)'
192+
print_stats_table match_stats_joni
193+
puts ' --------------------'
194+
puts ' Matches (TRegex)'
195+
print_stats_table match_stats_tregex
196+
197+
if Truffle::Boot.get_option('regexp-instrument-creation')
198+
puts ' --------------------'
199+
puts ' Unused Regexps'
200+
puts unused_regexps_array.map { |regexp| " #{regexp}" }
201+
end
202+
203+
puts '--------------------'
204+
end
205+
elsif output_format == 'json'
206+
ret = {}
207+
208+
if Truffle::Boot.get_option('regexp-instrument-creation')
209+
ret[:regexps] = compiled_regexp_hash_array
210+
end
211+
212+
if Truffle::Boot.get_option('regexp-instrument-match')
213+
ret[:matches] = matched_regexp_hash_array
214+
end
215+
216+
# The values must be collected before requiring the JSON library, since the JSON library will load new regexps
217+
# to support its own operations. We don't want those to pollute the application profile.
218+
require 'json'
219+
puts JSON.pretty_generate(ret)
220+
else
221+
raise "Unsupported regexp output format: #{output_format}"
201222
end
202223
end
203224

src/options.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ INTERNAL: # Options for debugging the TruffleRuby implementation
236236
# Instrumentation to debug performance
237237
REGEXP_INSTRUMENT_CREATION: [regexp-instrument-creation, boolean, false, Enable instrumentation to gather stats on regexp creation]
238238
REGEXP_INSTRUMENT_MATCH: [regexp-instrument-match, boolean, false, Enable instrumentation to gather stats on regexp matching]
239+
REGEXP_INSTRUMENT_OUTPUT_FORMAT: [regexp-instrumentation-output-format, string, 'text', 'Output format for regexp instrumentation (\\\"text\\\" or \\\"json\\\")']
239240

240241
# Options for metrics, the output cannot be used directly without processing
241242
METRICS_TIME_PARSING_FILE: [metrics-time-parsing-file, boolean, false, 'Measure time for parsing, translating and executing files, per file']

src/shared/java/org/truffleruby/shared/options/OptionsCatalog.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ public class OptionsCatalog {
151151
public static final OptionKey<Boolean> METHODMISSING_ALWAYS_INLINE_KEY = new OptionKey<>(INLINE_DEFAULT_KEY.getDefaultValue());
152152
public static final OptionKey<Boolean> REGEXP_INSTRUMENT_CREATION_KEY = new OptionKey<>(false);
153153
public static final OptionKey<Boolean> REGEXP_INSTRUMENT_MATCH_KEY = new OptionKey<>(false);
154+
public static final OptionKey<String> REGEXP_INSTRUMENT_OUTPUT_FORMAT_KEY = new OptionKey<>("text");
154155
public static final OptionKey<Boolean> METRICS_TIME_PARSING_FILE_KEY = new OptionKey<>(false);
155156
public static final OptionKey<Boolean> METRICS_TIME_REQUIRE_KEY = new OptionKey<>(false);
156157
public static final OptionKey<Boolean> SHARED_OBJECTS_ENABLED_KEY = new OptionKey<>(true);
@@ -1078,6 +1079,13 @@ public class OptionsCatalog {
10781079
.stability(OptionStability.EXPERIMENTAL)
10791080
.build();
10801081

1082+
public static final OptionDescriptor REGEXP_INSTRUMENT_OUTPUT_FORMAT = OptionDescriptor
1083+
.newBuilder(REGEXP_INSTRUMENT_OUTPUT_FORMAT_KEY, "ruby.regexp-instrumentation-output-format")
1084+
.help("Output format for regexp instrumentation (\\\"text\\\" or \\\"json\\\")")
1085+
.category(OptionCategory.INTERNAL)
1086+
.stability(OptionStability.EXPERIMENTAL)
1087+
.build();
1088+
10811089
public static final OptionDescriptor METRICS_TIME_PARSING_FILE = OptionDescriptor
10821090
.newBuilder(METRICS_TIME_PARSING_FILE_KEY, "ruby.metrics-time-parsing-file")
10831091
.help("Measure time for parsing, translating and executing files, per file")
@@ -1405,6 +1413,8 @@ public static OptionDescriptor fromName(String name) {
14051413
return REGEXP_INSTRUMENT_CREATION;
14061414
case "ruby.regexp-instrument-match":
14071415
return REGEXP_INSTRUMENT_MATCH;
1416+
case "ruby.regexp-instrumentation-output-format":
1417+
return REGEXP_INSTRUMENT_OUTPUT_FORMAT;
14081418
case "ruby.metrics-time-parsing-file":
14091419
return METRICS_TIME_PARSING_FILE;
14101420
case "ruby.metrics-time-require":
@@ -1561,6 +1571,7 @@ public static OptionDescriptor[] allDescriptors() {
15611571
METHODMISSING_ALWAYS_INLINE,
15621572
REGEXP_INSTRUMENT_CREATION,
15631573
REGEXP_INSTRUMENT_MATCH,
1574+
REGEXP_INSTRUMENT_OUTPUT_FORMAT,
15641575
METRICS_TIME_PARSING_FILE,
15651576
METRICS_TIME_REQUIRE,
15661577
SHARED_OBJECTS_ENABLED,

0 commit comments

Comments
 (0)