17
17
import java .util .Set ;
18
18
import java .util .concurrent .ConcurrentHashMap ;
19
19
import java .util .concurrent .atomic .AtomicInteger ;
20
+ import java .util .concurrent .atomic .AtomicLong ;
21
+ import java .util .function .Function ;
20
22
import java .util .stream .Collectors ;
21
23
22
24
import com .oracle .truffle .api .CompilerDirectives ;
@@ -88,14 +90,21 @@ public class TruffleRegexpNodes {
88
90
89
91
@ TruffleBoundary
90
92
private static void instrumentMatch (ConcurrentHashMap <MatchInfo , AtomicInteger > metricsMap , RubyRegexp regexp ,
91
- Object string , boolean fromStart ) {
93
+ Object string , boolean fromStart , boolean collectDetailedStats ) {
92
94
Rope source = regexp .source ;
93
95
RegexpOptions options = regexp .options ;
94
96
TruffleRegexpNodes .MatchInfo matchInfo = new TruffleRegexpNodes .MatchInfo (
95
97
new RegexpCacheKey (source , regexp .encoding , options , Hashing .NO_SEED ),
96
- fromStart ,
97
- RubyStringLibrary .getUncached ().getEncoding (string ));
98
+ fromStart );
98
99
ConcurrentOperations .getOrCompute (metricsMap , matchInfo , x -> new AtomicInteger ()).incrementAndGet ();
100
+
101
+ if (collectDetailedStats ) {
102
+ final MatchInfoStats stats = ConcurrentOperations
103
+ .getOrCompute (MATCHED_REGEXP_STATS , matchInfo , x -> new MatchInfoStats ());
104
+ stats .record (
105
+ RubyStringLibrary .getUncached ().getRope (string ),
106
+ RubyStringLibrary .getUncached ().getEncoding (string ));
107
+ }
99
108
}
100
109
101
110
// rb_reg_prepare_enc ... mostly. Some of the error checks are performed by callers of this method.
@@ -361,6 +370,36 @@ protected <T> RubyArray fillinInstrumentData(Map<T, AtomicInteger> map, ArrayBui
361
370
}
362
371
return createArray (arrayBuilderNode .finish (state , n ), n );
363
372
}
373
+
374
+ @ TruffleBoundary
375
+ protected static Set <RegexpCacheKey > allCompiledRegexps () {
376
+ final Set <RegexpCacheKey > ret = new HashSet <>();
377
+
378
+ ret .addAll (COMPILED_REGEXPS_DYNAMIC .keySet ());
379
+ ret .addAll (COMPILED_REGEXPS_LITERAL .keySet ());
380
+
381
+ return ret ;
382
+ }
383
+
384
+ @ TruffleBoundary
385
+ protected static Set <RegexpCacheKey > allMatchedRegexps () {
386
+ final Set <RegexpCacheKey > ret = new HashSet <>();
387
+
388
+ ret .addAll (
389
+ MATCHED_REGEXPS_JONI
390
+ .keySet ()
391
+ .stream ()
392
+ .map (matchInfo -> matchInfo .regexpInfo )
393
+ .collect (Collectors .toSet ()));
394
+ ret .addAll (
395
+ MATCHED_REGEXPS_TREGEX
396
+ .keySet ()
397
+ .stream ()
398
+ .map (matchInfo -> matchInfo .regexpInfo )
399
+ .collect (Collectors .toSet ()));
400
+
401
+ return ret ;
402
+ }
364
403
}
365
404
366
405
@ CoreMethod (names = "regexp_compilation_stats_array" , onSingleton = true , required = 1 )
@@ -390,28 +429,14 @@ protected Object buildStatsArray(boolean joniMatches,
390
429
}
391
430
392
431
@ CoreMethod (names = "unused_regexps_array" , onSingleton = true , required = 0 )
393
- public abstract static class UnusedRegexpsArray extends CoreMethodArrayArgumentsNode {
432
+ public abstract static class UnusedRegexpsArray extends RegexpStatsNode {
394
433
434
+ @ TruffleBoundary
395
435
@ Specialization
396
436
protected Object buildUnusedRegexpsArray (
397
437
@ Cached ArrayBuilderNode arrayBuilderNode ) {
398
- final Set <RegexpCacheKey > compiledRegexps = new HashSet <>();
399
- compiledRegexps .addAll (COMPILED_REGEXPS_DYNAMIC .keySet ());
400
- compiledRegexps .addAll (COMPILED_REGEXPS_LITERAL .keySet ());
401
-
402
- final Set <RegexpCacheKey > matchedRegexps = new HashSet <>();
403
- matchedRegexps .addAll (
404
- MATCHED_REGEXPS_JONI
405
- .keySet ()
406
- .stream ()
407
- .map (matchInfo -> matchInfo .regexpInfo )
408
- .collect (Collectors .toSet ()));
409
- matchedRegexps .addAll (
410
- MATCHED_REGEXPS_TREGEX
411
- .keySet ()
412
- .stream ()
413
- .map (matchInfo -> matchInfo .regexpInfo )
414
- .collect (Collectors .toSet ()));
438
+ final Set <RegexpCacheKey > compiledRegexps = allCompiledRegexps ();
439
+ final Set <RegexpCacheKey > matchedRegexps = allMatchedRegexps ();
415
440
416
441
final Set <RegexpCacheKey > unusedRegexps = new HashSet <>(compiledRegexps );
417
442
unusedRegexps .removeAll (matchedRegexps );
@@ -429,29 +454,14 @@ protected Object buildUnusedRegexpsArray(
429
454
}
430
455
431
456
@ CoreMethod (names = "compiled_regexp_hash_array" , onSingleton = true , required = 0 )
432
- public abstract static class CompiledRegexpHashArray extends CoreMethodArrayArgumentsNode {
457
+ public abstract static class CompiledRegexpHashArray extends RegexpStatsNode {
433
458
459
+ @ TruffleBoundary
434
460
@ Specialization
435
461
protected Object buildInfoArray (
436
462
@ Cached ArrayBuilderNode arrayBuilderNode ,
437
- @ CachedLibrary (limit = "3" ) HashStoreLibrary hashStoreLibrary ) {
438
- final Set <RegexpCacheKey > compiledRegexps = new HashSet <>();
439
- compiledRegexps .addAll (COMPILED_REGEXPS_DYNAMIC .keySet ());
440
- compiledRegexps .addAll (COMPILED_REGEXPS_LITERAL .keySet ());
441
-
442
- final Set <RegexpCacheKey > matchedRegexps = new HashSet <>();
443
- matchedRegexps .addAll (
444
- MATCHED_REGEXPS_JONI
445
- .keySet ()
446
- .stream ()
447
- .map (matchInfo -> matchInfo .regexpInfo )
448
- .collect (Collectors .toSet ()));
449
- matchedRegexps .addAll (
450
- MATCHED_REGEXPS_TREGEX
451
- .keySet ()
452
- .stream ()
453
- .map (matchInfo -> matchInfo .regexpInfo )
454
- .collect (Collectors .toSet ()));
463
+ @ CachedLibrary (limit = "1" ) HashStoreLibrary hashStoreLibrary ) {
464
+ final Set <RegexpCacheKey > matchedRegexps = allMatchedRegexps ();
455
465
456
466
final int arraySize = COMPILED_REGEXPS_LITERAL .size () + COMPILED_REGEXPS_DYNAMIC .size ();
457
467
final BuilderState state = arrayBuilderNode .start (arraySize );
@@ -513,7 +523,10 @@ protected static RubyHash buildRegexInfoHash(RubyContext context, RubyLanguage l
513
523
hashStoreLibrary .set (hash .store , hash , language .getSymbol ("isLiteral" ), isRegexpLiteral .get (), true );
514
524
}
515
525
516
- hashStoreLibrary .set (hash .store , hash , language .getSymbol ("isUsed" ), isUsed , true );
526
+ if (context .getOptions ().REGEXP_INSTRUMENT_MATCH ) {
527
+ hashStoreLibrary .set (hash .store , hash , language .getSymbol ("isUsed" ), isUsed , true );
528
+ }
529
+
517
530
hashStoreLibrary .set (hash .store , hash , language .getSymbol ("encoding" ), regexpInfo .getEncoding (), true );
518
531
hashStoreLibrary .set (
519
532
hash .store ,
@@ -529,8 +542,9 @@ protected static RubyHash buildRegexInfoHash(RubyContext context, RubyLanguage l
529
542
}
530
543
531
544
@ CoreMethod (names = "matched_regexp_hash_array" , onSingleton = true , required = 0 )
532
- public abstract static class MatchedRegexpHashArray extends CoreMethodArrayArgumentsNode {
545
+ public abstract static class MatchedRegexpHashArray extends RegexpStatsNode {
533
546
547
+ @ TruffleBoundary
534
548
@ Specialization
535
549
protected Object buildInfoArray (
536
550
@ Cached ArrayBuilderNode arrayBuilderNode ,
@@ -561,12 +575,18 @@ private void processGroup(ConcurrentHashMap<MatchInfo, AtomicInteger> group,
561
575
.appendValue (
562
576
state ,
563
577
offset + n ,
564
- buildHash (hashStoreLibrary , isTRegexMatch , entry .getKey (), entry .getValue ()));
578
+ buildHash (
579
+ hashStoreLibrary ,
580
+ arrayBuilderNode ,
581
+ isTRegexMatch ,
582
+ entry .getKey (),
583
+ entry .getValue ()));
565
584
n ++;
566
585
}
567
586
}
568
587
569
- private RubyHash buildHash (HashStoreLibrary hashStoreLibrary , boolean isTRegexMatch , MatchInfo matchInfo ,
588
+ private RubyHash buildHash (HashStoreLibrary hashStoreLibrary , ArrayBuilderNode arrayBuilderNode ,
589
+ boolean isTRegexMatch , MatchInfo matchInfo ,
570
590
AtomicInteger count ) {
571
591
final RubyHash regexpInfoHash = CompiledRegexpHashArray .buildRegexInfoHash (
572
592
getContext (),
@@ -591,10 +611,99 @@ private RubyHash buildHash(HashStoreLibrary hashStoreLibrary, boolean isTRegexMa
591
611
matchInfo .matchStart ,
592
612
true );
593
613
614
+ if (getContext ().getOptions ().REGEXP_INSTRUMENT_MATCH_DETAILED ) {
615
+ hashStoreLibrary .set (
616
+ matchInfoHash .store ,
617
+ matchInfoHash ,
618
+ getLanguage ().getSymbol ("match_stats" ),
619
+ buildMatchInfoStatsHash (hashStoreLibrary , arrayBuilderNode , matchInfo ),
620
+ true );
621
+ }
622
+
594
623
assert hashStoreLibrary .verify (matchInfoHash .store , matchInfoHash );
595
624
596
625
return matchInfoHash ;
597
626
}
627
+
628
+ private RubyHash buildMatchInfoStatsHash (HashStoreLibrary hashStoreLibrary , ArrayBuilderNode arrayBuilderNode ,
629
+ MatchInfo matchInfo ) {
630
+ final MatchInfoStats stats = MATCHED_REGEXP_STATS .get (matchInfo );
631
+ final RubyHash ret = HashOperations .newEmptyHash (getContext (), getLanguage ());
632
+
633
+ buildAndSetDistributionHash (
634
+ hashStoreLibrary ,
635
+ ret ,
636
+ "byte_array_populated" ,
637
+ stats .byteArrayPopulatedFrequencies ,
638
+ Optional .empty (),
639
+ Optional .of (count -> count .get ()));
640
+
641
+ buildAndSetDistributionHash (
642
+ hashStoreLibrary ,
643
+ ret ,
644
+ "byte_lengths" ,
645
+ stats .byteLengthFrequencies ,
646
+ Optional .empty (),
647
+ Optional .of (count -> count .get ()));
648
+
649
+ buildAndSetDistributionHash (
650
+ hashStoreLibrary ,
651
+ ret ,
652
+ "character_lengths" ,
653
+ stats .characterLengthFrequencies ,
654
+ Optional .empty (),
655
+ Optional .of (count -> count .get ()));
656
+
657
+ buildAndSetDistributionHash (
658
+ hashStoreLibrary ,
659
+ ret ,
660
+ "code_ranges" ,
661
+ stats .codeRangeFrequencies ,
662
+ Optional .of (codeRange -> getLanguage ().getSymbol (codeRange .toString ())),
663
+ Optional .of (count -> count .get ()));
664
+
665
+ buildAndSetDistributionHash (
666
+ hashStoreLibrary ,
667
+ ret ,
668
+ "encodings" ,
669
+ stats .encodingFrequencies ,
670
+ Optional .empty (),
671
+ Optional .of (count -> count .get ()));
672
+
673
+ buildAndSetDistributionHash (
674
+ hashStoreLibrary ,
675
+ ret ,
676
+ "rope_types" ,
677
+ stats .ropeClassFrequencies ,
678
+ Optional .of (
679
+ className -> StringOperations .createUTF8String (
680
+ getContext (),
681
+ getLanguage (),
682
+ StringOperations .encodeRope (className , UTF8Encoding .INSTANCE ))),
683
+ Optional .of (count -> count .get ()));
684
+
685
+ return ret ;
686
+ }
687
+
688
+ private <K , V > void buildAndSetDistributionHash (HashStoreLibrary hashStoreLibrary , RubyHash hash ,
689
+ String keyName , ConcurrentHashMap <K , V > distribution , Optional <Function <K , Object >> keyMapper ,
690
+ Optional <Function <V , Object >> valueMapper ) {
691
+ final RubyHash distributionHash = HashOperations .toRubyHash (
692
+ getContext (),
693
+ getLanguage (),
694
+ hashStoreLibrary ,
695
+ distribution ,
696
+ keyMapper ,
697
+ valueMapper ,
698
+ true );
699
+
700
+ hashStoreLibrary .set (
701
+ hash .store ,
702
+ hash ,
703
+ getLanguage ().getSymbol (keyName ),
704
+ distributionHash ,
705
+ true );
706
+ }
598
707
}
599
708
600
709
@ Primitive (name = "regexp_initialized?" )
@@ -693,7 +802,12 @@ protected Object matchInRegionTRegex(
693
802
}
694
803
695
804
if (getContext ().getOptions ().REGEXP_INSTRUMENT_MATCH ) {
696
- TruffleRegexpNodes .instrumentMatch (MATCHED_REGEXPS_TREGEX , regexp , string , atStart );
805
+ TruffleRegexpNodes .instrumentMatch (
806
+ MATCHED_REGEXPS_TREGEX ,
807
+ regexp ,
808
+ string ,
809
+ atStart ,
810
+ getContext ().getOptions ().REGEXP_INSTRUMENT_MATCH_DETAILED );
697
811
}
698
812
699
813
int fromIndex = fromPos ;
@@ -841,7 +955,12 @@ protected Object executeMatch(
841
955
RubyRegexp regexp , Object string , Matcher matcher , int startPos , int range , boolean onlyMatchAtStart ,
842
956
@ Cached ConditionProfile matchesProfile ) {
843
957
if (getContext ().getOptions ().REGEXP_INSTRUMENT_MATCH ) {
844
- TruffleRegexpNodes .instrumentMatch (MATCHED_REGEXPS_JONI , regexp , string , onlyMatchAtStart );
958
+ TruffleRegexpNodes .instrumentMatch (
959
+ MATCHED_REGEXPS_JONI ,
960
+ regexp ,
961
+ string ,
962
+ onlyMatchAtStart ,
963
+ getContext ().getOptions ().REGEXP_INSTRUMENT_MATCH_DETAILED );
845
964
}
846
965
847
966
int match = runMatch (matcher , startPos , range , onlyMatchAtStart );
@@ -896,13 +1015,11 @@ static final class MatchInfo {
896
1015
897
1016
private final RegexpCacheKey regexpInfo ;
898
1017
private final boolean matchStart ;
899
- private final RubyEncoding matchEncoding ;
900
1018
901
- MatchInfo (RegexpCacheKey regexpInfo , boolean matchStart , RubyEncoding matchEncoding ) {
1019
+ MatchInfo (RegexpCacheKey regexpInfo , boolean matchStart ) {
902
1020
assert regexpInfo != null ;
903
1021
this .regexpInfo = regexpInfo ;
904
1022
this .matchStart = matchStart ;
905
- this .matchEncoding = matchEncoding ;
906
1023
}
907
1024
908
1025
@ Override
@@ -921,24 +1038,54 @@ public boolean equals(Object obj) {
921
1038
}
922
1039
923
1040
MatchInfo other = (MatchInfo ) obj ;
924
- return matchStart == other .matchStart && matchEncoding == other . matchEncoding &&
1041
+ return matchStart == other .matchStart &&
925
1042
regexpInfo .equals (other .regexpInfo );
926
1043
}
927
1044
928
1045
@ Override
929
1046
public String toString () {
930
1047
return String .format (
931
- "Match (%s, fromStart = %s, encoding = %s )" ,
1048
+ "Match (%s, fromStart = %s)" ,
932
1049
regexpInfo ,
933
- matchStart ,
934
- RopeOperations .decodeOrEscapeBinaryRope (matchEncoding .name .rope ));
1050
+ matchStart );
1051
+ }
1052
+ }
1053
+
1054
+ static final class MatchInfoStats {
1055
+
1056
+ private final ConcurrentHashMap <Boolean , AtomicLong > byteArrayPopulatedFrequencies = new ConcurrentHashMap <>();
1057
+ private final ConcurrentHashMap <Integer , AtomicLong > byteLengthFrequencies = new ConcurrentHashMap <>();
1058
+ private final ConcurrentHashMap <Integer , AtomicLong > characterLengthFrequencies = new ConcurrentHashMap <>();
1059
+ private final ConcurrentHashMap <CodeRange , AtomicLong > codeRangeFrequencies = new ConcurrentHashMap <>();
1060
+ private final ConcurrentHashMap <RubyEncoding , AtomicLong > encodingFrequencies = new ConcurrentHashMap <>();
1061
+ private final ConcurrentHashMap <String , AtomicLong > ropeClassFrequencies = new ConcurrentHashMap <>();
1062
+
1063
+ private void record (Rope rope , RubyEncoding encoding ) {
1064
+ ConcurrentOperations
1065
+ .getOrCompute (byteArrayPopulatedFrequencies , rope .getRawBytes () != null , x -> new AtomicLong ())
1066
+ .incrementAndGet ();
1067
+ ConcurrentOperations
1068
+ .getOrCompute (byteLengthFrequencies , rope .byteLength (), x -> new AtomicLong ())
1069
+ .incrementAndGet ();
1070
+ ConcurrentOperations
1071
+ .getOrCompute (characterLengthFrequencies , rope .characterLength (), x -> new AtomicLong ())
1072
+ .incrementAndGet ();
1073
+ ConcurrentOperations
1074
+ .getOrCompute (codeRangeFrequencies , rope .getCodeRange (), x -> new AtomicLong ())
1075
+ .incrementAndGet ();
1076
+ ConcurrentOperations .getOrCompute (encodingFrequencies , encoding , x -> new AtomicLong ()).incrementAndGet ();
1077
+ ConcurrentOperations
1078
+ .getOrCompute (ropeClassFrequencies , rope .getClass ().getSimpleName (), x -> new AtomicLong ())
1079
+ .incrementAndGet ();
935
1080
}
1081
+
936
1082
}
937
1083
938
1084
private static ConcurrentHashMap <RegexpCacheKey , AtomicInteger > COMPILED_REGEXPS_DYNAMIC = new ConcurrentHashMap <>();
939
1085
private static ConcurrentHashMap <RegexpCacheKey , AtomicInteger > COMPILED_REGEXPS_LITERAL = new ConcurrentHashMap <>();
940
1086
private static ConcurrentHashMap <MatchInfo , AtomicInteger > MATCHED_REGEXPS_JONI = new ConcurrentHashMap <>();
941
1087
private static ConcurrentHashMap <MatchInfo , AtomicInteger > MATCHED_REGEXPS_TREGEX = new ConcurrentHashMap <>();
1088
+ private static ConcurrentHashMap <MatchInfo , MatchInfoStats > MATCHED_REGEXP_STATS = new ConcurrentHashMap <>();
942
1089
943
1090
/** WARNING: computeRegexpEncoding() mutates options, so the caller should make sure it's a copy */
944
1091
@ TruffleBoundary
0 commit comments