Skip to content

Commit 38b8640

Browse files
authored
Python: Fix bad join in RegExpBackRef::getGroup
Although this wasn't (as far as I know) causing any performance issues, it was making the join-order badness report quite noisy, and so I figured it was worth fixing. Before: ``` Tuple counts for RegexTreeView::RegExpBackRef::getGroup#dispred#f0820431#ff/2@d3441d0b after 84ms: 1501195 ~3% {2} r1 = JOIN RegexTreeView::RegExpTerm::getLiteral#dispred#f0820431#ff_10#join_rhs WITH RegexTreeView::RegExpTerm::getLiteral#dispred#f0820431#ff_10#join_rhs ON FIRST 1 OUTPUT Rhs.1 'result', Lhs.1 'result' 149 ~0% {5} r2 = JOIN r1 WITH RegexTreeView::RegExpBackRef#class#31aac2a7#ffff ON FIRST 1 OUTPUT Rhs.1, Rhs.2, Rhs.3, Lhs.1 'result', Lhs.0 'this' 149 ~1% {3} r3 = JOIN r2 WITH regex::RegexString::numbered_backreference#dispred#f0820431#ffff ON FIRST 3 OUTPUT Lhs.3 'result', Rhs.3, Lhs.4 'this' 4 ~0% {2} r4 = JOIN r3 WITH RegexTreeView::RegExpGroup::getNumber#dispred#f0820431#ff ON FIRST 2 OUTPUT Lhs.2 'this', Lhs.0 'result' 1501195 ~3% {2} r5 = JOIN RegexTreeView::RegExpTerm::getLiteral#dispred#f0820431#ff_10#join_rhs WITH RegexTreeView::RegExpTerm::getLiteral#dispred#f0820431#ff_10#join_rhs ON FIRST 1 OUTPUT Lhs.1 'result', Rhs.1 'result' 42526 ~0% {5} r6 = JOIN r5 WITH RegexTreeView::RegExpGroup#31aac2a7#ffff ON FIRST 1 OUTPUT Lhs.1 'this', Lhs.0 'result', Rhs.1, Rhs.2, Rhs.3 22 ~0% {8} r7 = JOIN r6 WITH RegexTreeView::RegExpBackRef#class#31aac2a7#ffff ON FIRST 1 OUTPUT Lhs.2, Lhs.3, Lhs.4, Lhs.1 'result', Lhs.0 'this', Rhs.1, Rhs.2, Rhs.3 0 ~0% {6} r8 = JOIN r7 WITH regex::RegexString::getGroupName#dispred#f0820431#ffff ON FIRST 3 OUTPUT Lhs.5, Lhs.6, Lhs.7, Rhs.3, Lhs.3 'result', Lhs.4 'this' 0 ~0% {2} r9 = JOIN r8 WITH regex::RegexString::named_backreference#dispred#f0820431#ffff ON FIRST 4 OUTPUT Lhs.5 'this', Lhs.4 'result' 4 ~0% {2} r10 = r4 UNION r9 return r10 ``` In this case I opted for a classical solution: tying together the literal and number (or name) part of the backreference in order to encourage a two-column join. After: ``` Tuple counts for RegexTreeView::RegExpBackRef::getGroup#dispred#f0820431#ff/2@b0cc4d5n after 0ms: 898 ~1% {3} r1 = JOIN RegexTreeView::RegExpTerm::getLiteral#dispred#f0820431#ff WITH RegexTreeView::RegExpGroup::getNumber#dispred#f0820431#ff ON FIRST 1 OUTPUT Lhs.1, Rhs.1, Lhs.0 'result' 4 ~0% {2} r2 = JOIN r1 WITH RegexTreeView::RegExpBackRef::hasLiteralAndNumber#f0820431#fff_120#join_rhs ON FIRST 2 OUTPUT Rhs.2 'this', Lhs.2 'result' 1110 ~0% {5} r3 = JOIN RegexTreeView::RegExpGroup#31aac2a7#ffff WITH RegexTreeView::RegExpTerm::getLiteral#dispred#f0820431#ff ON FIRST 1 OUTPUT Lhs.1, Lhs.2, Lhs.3, Lhs.0 'result', Rhs.1 146 ~0% {3} r4 = JOIN r3 WITH regex::RegexString::getGroupName#dispred#f0820431#ffff ON FIRST 3 OUTPUT Lhs.4, Rhs.3, Lhs.3 'result' 0 ~0% {2} r5 = JOIN r4 WITH RegexTreeView::RegExpBackRef::hasLiteralAndName#f0820431#fff_120#join_rhs ON FIRST 2 OUTPUT Rhs.2 'this', Lhs.2 'result' 4 ~0% {2} r6 = r2 UNION r5 return r6 ```
1 parent 3543864 commit 38b8640

File tree

1 file changed

+16
-5
lines changed

1 file changed

+16
-5
lines changed

python/ql/lib/semmle/python/RegexTreeView.qll

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1000,11 +1000,22 @@ class RegExpBackRef extends RegExpTerm, TRegExpBackRef {
10001000

10011001
/** Gets the capture group this back reference refers to. */
10021002
RegExpGroup getGroup() {
1003-
result.getLiteral() = this.getLiteral() and
1004-
(
1005-
result.getNumber() = this.getNumber() or
1006-
result.getName() = this.getName()
1007-
)
1003+
this.hasLiteralAndNumber(result.getLiteral(), result.getNumber()) or
1004+
this.hasLiteralAndName(result.getLiteral(), result.getName())
1005+
}
1006+
1007+
/** Join-order helper for `getGroup`. */
1008+
pragma[nomagic]
1009+
private predicate hasLiteralAndNumber(RegExpLiteral literal, int number) {
1010+
literal = this.getLiteral() and
1011+
number = this.getNumber()
1012+
}
1013+
1014+
/** Join-order helper for `getGroup`. */
1015+
pragma[nomagic]
1016+
private predicate hasLiteralAndName(RegExpLiteral literal, string name) {
1017+
literal = this.getLiteral() and
1018+
name = this.getName()
10081019
}
10091020

10101021
override RegExpTerm getChild(int i) { none() }

0 commit comments

Comments
 (0)