Skip to content

Commit d713910

Browse files
authored
Merge pull request #10334 from aschackmull/java/uniontypeflow
Java: Implement union type flow and replace ad-hoc variable tracking in dispatch
2 parents 2ece977 + 64e2f41 commit d713910

File tree

13 files changed

+459
-115
lines changed

13 files changed

+459
-115
lines changed

java/ql/lib/semmle/code/java/dataflow/TypeFlow.qll

Lines changed: 232 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -287,10 +287,12 @@ private module RankEdge<edgeSig/2 edge> implements RankedEdge {
287287
}
288288

289289
private signature module TypePropagation {
290-
predicate candType(TypeFlowNode n, RefType t);
290+
class Typ;
291+
292+
predicate candType(TypeFlowNode n, Typ t);
291293

292294
bindingset[t]
293-
predicate supportsType(TypeFlowNode n, RefType t);
295+
predicate supportsType(TypeFlowNode n, Typ t);
294296
}
295297

296298
/** Implements recursion through `forall` by way of edge ranking. */
@@ -300,7 +302,7 @@ private module ForAll<RankedEdge Edge, TypePropagation T> {
300302
* thus is a candidate bound for `n`.
301303
*/
302304
pragma[nomagic]
303-
private predicate candJoinType(TypeFlowNode n, RefType t) {
305+
private predicate candJoinType(TypeFlowNode n, T::Typ t) {
304306
exists(TypeFlowNode mid |
305307
T::candType(mid, t) and
306308
Edge::edgeRank(_, mid, n)
@@ -311,7 +313,7 @@ private module ForAll<RankedEdge Edge, TypePropagation T> {
311313
* Holds if `t` is a candidate bound for `n` that is also valid for data coming
312314
* through the edges into `n` ranked from `1` to `r`.
313315
*/
314-
private predicate flowJoin(int r, TypeFlowNode n, RefType t) {
316+
private predicate flowJoin(int r, TypeFlowNode n, T::Typ t) {
315317
(
316318
r = 1 and candJoinType(n, t)
317319
or
@@ -325,7 +327,7 @@ private module ForAll<RankedEdge Edge, TypePropagation T> {
325327
* coming through all the incoming edges, and therefore is a valid bound for
326328
* `n`.
327329
*/
328-
predicate flowJoin(TypeFlowNode n, RefType t) { flowJoin(Edge::lastRank(n), n, t) }
330+
predicate flowJoin(TypeFlowNode n, T::Typ t) { flowJoin(Edge::lastRank(n), n, t) }
329331
}
330332

331333
module RankedJoinStep = RankEdge<joinStep/2>;
@@ -337,11 +339,13 @@ private predicate exactTypeBase(TypeFlowNode n, RefType t) {
337339
n.asExpr() = e and
338340
e.getType() = t and
339341
not e instanceof FunctionalExpr and
340-
exists(RefType sub | sub.getASourceSupertype() = t.getSourceDeclaration())
342+
exists(SrcRefType sub | sub.getASourceSupertype() = t.getSourceDeclaration())
341343
)
342344
}
343345

344346
private module ExactTypePropagation implements TypePropagation {
347+
class Typ = RefType;
348+
345349
predicate candType = exactType/2;
346350

347351
predicate supportsType = exactType/2;
@@ -384,17 +388,23 @@ private predicate upcastCand(TypeFlowNode n, RefType t1, RefType t1e, RefType t2
384388
)
385389
}
386390

391+
private predicate unconstrained(BoundedType t) {
392+
t.(Wildcard).isUnconstrained()
393+
or
394+
t.getUpperBoundType() instanceof TypeObject and
395+
not t.(Wildcard).hasLowerBound()
396+
or
397+
unconstrained(t.getUpperBoundType())
398+
or
399+
unconstrained(t.(Wildcard).getLowerBoundType())
400+
}
401+
387402
/** Holds if `t` is a raw type or parameterised type with unrestricted type arguments. */
388403
private predicate unbound(RefType t) {
389404
t instanceof RawType
390405
or
391406
exists(ParameterizedType pt | pt = t |
392-
forex(RefType arg | arg = pt.getATypeArgument() |
393-
arg.(Wildcard).isUnconstrained()
394-
or
395-
arg.(BoundedType).getUpperBoundType() instanceof TypeObject and
396-
not arg.(Wildcard).hasLowerBound()
397-
)
407+
forex(RefType arg | arg = pt.getATypeArgument() | unconstrained(arg))
398408
)
399409
}
400410

@@ -492,9 +502,10 @@ predicate arrayInstanceOfGuarded(ArrayAccess aa, RefType t) {
492502

493503
/**
494504
* Holds if `n` has type `t` and this information is discarded, such that `t`
495-
* might be a better type bound for nodes where `n` flows to.
505+
* might be a better type bound for nodes where `n` flows to. This might include
506+
* multiple bounds for a single node.
496507
*/
497-
private predicate typeFlowBase(TypeFlowNode n, RefType t) {
508+
private predicate typeFlowBaseCand(TypeFlowNode n, RefType t) {
498509
exists(RefType srctype |
499510
upcast(n, srctype) or
500511
upcastEnhancedForStmt(n.asSsa(), srctype) or
@@ -509,7 +520,29 @@ private predicate typeFlowBase(TypeFlowNode n, RefType t) {
509520
)
510521
}
511522

523+
/**
524+
* Holds if `n` has type `t` and this information is discarded, such that `t`
525+
* might be a better type bound for nodes where `n` flows to. This only includes
526+
* the best such bound for each node.
527+
*/
528+
private predicate typeFlowBase(TypeFlowNode n, RefType t) {
529+
exists(RefType te |
530+
typeFlowBaseCand(n, t) and
531+
te = t.getErasure() and
532+
not exists(RefType better |
533+
typeFlowBaseCand(n, better) and
534+
better != t and
535+
not t.getASupertype+() = better
536+
|
537+
better.getASupertype+() = t or
538+
better.getErasure().(RefType).getASourceSupertype+() = te
539+
)
540+
)
541+
}
542+
512543
private module TypeFlowPropagation implements TypePropagation {
544+
class Typ = RefType;
545+
513546
predicate candType = typeFlow/2;
514547

515548
bindingset[t]
@@ -589,6 +622,175 @@ private predicate bestTypeFlow(TypeFlowNode n, RefType t) {
589622
not irrelevantBound(n, t)
590623
}
591624

625+
private predicate bestTypeFlow(TypeFlowNode n, RefType t, boolean exact) {
626+
exactType(n, t) and exact = true
627+
or
628+
not exactType(n, _) and bestTypeFlow(n, t) and exact = false
629+
}
630+
631+
private predicate bestTypeFlowOrTypeFlowBase(TypeFlowNode n, RefType t, boolean exact) {
632+
bestTypeFlow(n, t, exact)
633+
or
634+
typeFlowBase(n, t) and
635+
exact = false and
636+
not bestTypeFlow(n, _, _)
637+
}
638+
639+
/**
640+
* Holds if `n` has type `t` and this information is not propagated as a
641+
* universal bound to a subsequent node, such that `t` might form the basis for
642+
* a union type bound for that node.
643+
*/
644+
private predicate unionTypeFlowBaseCand(TypeFlowNode n, RefType t, boolean exact) {
645+
exists(TypeFlowNode next |
646+
joinStep(n, next) and
647+
bestTypeFlowOrTypeFlowBase(n, t, exact) and
648+
not bestTypeFlowOrTypeFlowBase(next, t, exact) and
649+
not exactType(next, _)
650+
)
651+
}
652+
653+
/**
654+
* Holds if `ioe` checks `v`, its true-successor is `bb`, and `bb` has multiple
655+
* predecessors.
656+
*/
657+
private predicate instanceofDisjunct(InstanceOfExpr ioe, BasicBlock bb, BaseSsaVariable v) {
658+
ioe.getExpr() = v.getAUse() and
659+
strictcount(bb.getABBPredecessor()) > 1 and
660+
exists(ConditionBlock cb | cb.getCondition() = ioe and cb.getTestSuccessor(true) = bb)
661+
}
662+
663+
/** Holds if `bb` is disjunctively guarded by multiple `instanceof` tests on `v`. */
664+
private predicate instanceofDisjunction(BasicBlock bb, BaseSsaVariable v) {
665+
strictcount(InstanceOfExpr ioe | instanceofDisjunct(ioe, bb, v)) =
666+
strictcount(bb.getABBPredecessor())
667+
}
668+
669+
/**
670+
* Holds if `n` is a value that is guarded by a disjunction of
671+
* `instanceof t_i` where `t` is one of those `t_i`.
672+
*/
673+
private predicate instanceofDisjunctionGuarded(TypeFlowNode n, RefType t) {
674+
exists(BasicBlock bb, InstanceOfExpr ioe, BaseSsaVariable v, VarAccess va |
675+
instanceofDisjunction(bb, v) and
676+
bb.bbDominates(va.getBasicBlock()) and
677+
va = v.getAUse() and
678+
instanceofDisjunct(ioe, bb, v) and
679+
t = ioe.getCheckedType() and
680+
n.asExpr() = va
681+
)
682+
}
683+
684+
private module HasUnionTypePropagation implements TypePropagation {
685+
class Typ = Unit;
686+
687+
predicate candType(TypeFlowNode mid, Unit unit) {
688+
exists(unit) and
689+
(unionTypeFlowBaseCand(mid, _, _) or hasUnionTypeFlow(mid))
690+
}
691+
692+
predicate supportsType = candType/2;
693+
}
694+
695+
/**
696+
* Holds if all incoming type flow can be traced back to a
697+
* `unionTypeFlowBaseCand`, such that we can compute a union type bound for `n`.
698+
* Disregards nodes for which we have an exact bound.
699+
*/
700+
private predicate hasUnionTypeFlow(TypeFlowNode n) {
701+
not exactType(n, _) and
702+
(
703+
// Optimized version of
704+
// `forex(TypeFlowNode mid | joinStep(mid, n) | unionTypeFlowBaseCand(mid, _, _) or hasUnionTypeFlow(mid))`
705+
ForAll<RankedJoinStep, HasUnionTypePropagation>::flowJoin(n, _)
706+
or
707+
exists(TypeFlowNode scc |
708+
sccRepr(n, scc) and
709+
// Optimized version of
710+
// `forex(TypeFlowNode mid | sccJoinStep(mid, scc) | unionTypeFlowBaseCand(mid, _, _) or hasUnionTypeFlow(mid))`
711+
ForAll<RankedSccJoinStep, HasUnionTypePropagation>::flowJoin(scc, _)
712+
)
713+
or
714+
exists(TypeFlowNode mid | step(mid, n) and hasUnionTypeFlow(mid))
715+
or
716+
instanceofDisjunctionGuarded(n, _)
717+
)
718+
}
719+
720+
pragma[nomagic]
721+
private RefType getTypeBound(TypeFlowNode n) {
722+
bestTypeFlow(n, result)
723+
or
724+
not bestTypeFlow(n, _) and result = n.getType()
725+
}
726+
727+
pragma[nomagic]
728+
private predicate unionTypeFlow0(TypeFlowNode n, RefType t, boolean exact) {
729+
hasUnionTypeFlow(n) and
730+
(
731+
exists(TypeFlowNode mid | anyStep(mid, n) |
732+
unionTypeFlowBaseCand(mid, t, exact) or unionTypeFlow(mid, t, exact)
733+
)
734+
or
735+
instanceofDisjunctionGuarded(n, t) and exact = false
736+
)
737+
}
738+
739+
/** Holds if we have a union type bound for `n` and `t` is one of its parts. */
740+
private predicate unionTypeFlow(TypeFlowNode n, RefType t, boolean exact) {
741+
unionTypeFlow0(n, t, exact) and
742+
// filter impossible union parts:
743+
if exact = true
744+
then t.getErasure().(RefType).getASourceSupertype*() = getTypeBound(n).getErasure()
745+
else haveIntersection(getTypeBound(n), t)
746+
}
747+
748+
/**
749+
* Holds if the inferred union type bound for `n` contains the best universal
750+
* bound and thus is irrelevant.
751+
*/
752+
private predicate irrelevantUnionType(TypeFlowNode n) {
753+
exists(RefType t, RefType nt, RefType te, RefType nte |
754+
unionTypeFlow(n, t, false) and
755+
nt = getTypeBound(n) and
756+
te = t.getErasure() and
757+
nte = nt.getErasure()
758+
|
759+
nt.getASupertype*() = t
760+
or
761+
nte.getASourceSupertype+() = te
762+
or
763+
nte = te and unbound(t)
764+
)
765+
}
766+
767+
/**
768+
* Holds if `t` is an irrelevant part of the union type bound for `n` due to
769+
* being contained in another part of the union type bound.
770+
*/
771+
private predicate irrelevantUnionTypePart(TypeFlowNode n, RefType t, boolean exact) {
772+
unionTypeFlow(n, t, exact) and
773+
not irrelevantUnionType(n) and
774+
exists(RefType weaker |
775+
unionTypeFlow(n, weaker, false) and
776+
t.getASupertype*() = weaker
777+
|
778+
exact = true or not weaker.getASupertype*() = t
779+
)
780+
}
781+
782+
/**
783+
* Holds if the runtime type of `n` is bounded by a union type and if this
784+
* bound is likely to be better than the static type of `n`. The union type is
785+
* made up of the types `t` related to `n` by this predicate, and the flag
786+
* `exact` indicates whether `t` is an exact bound or merely an upper bound.
787+
*/
788+
private predicate bestUnionType(TypeFlowNode n, RefType t, boolean exact) {
789+
unionTypeFlow(n, t, exact) and
790+
not irrelevantUnionType(n) and
791+
not irrelevantUnionTypePart(n, t, exact)
792+
}
793+
592794
cached
593795
private module TypeFlowBounds {
594796
/**
@@ -600,11 +802,7 @@ private module TypeFlowBounds {
600802
predicate fieldTypeFlow(Field f, RefType t, boolean exact) {
601803
exists(TypeFlowNode n |
602804
n.asField() = f and
603-
(
604-
exactType(n, t) and exact = true
605-
or
606-
not exactType(n, _) and bestTypeFlow(n, t) and exact = false
607-
)
805+
bestTypeFlow(n, t, exact)
608806
)
609807
}
610808

@@ -617,11 +815,21 @@ private module TypeFlowBounds {
617815
predicate exprTypeFlow(Expr e, RefType t, boolean exact) {
618816
exists(TypeFlowNode n |
619817
n.asExpr() = e and
620-
(
621-
exactType(n, t) and exact = true
622-
or
623-
not exactType(n, _) and bestTypeFlow(n, t) and exact = false
624-
)
818+
bestTypeFlow(n, t, exact)
819+
)
820+
}
821+
822+
/**
823+
* Holds if the runtime type of `e` is bounded by a union type and if this
824+
* bound is likely to be better than the static type of `e`. The union type is
825+
* made up of the types `t` related to `e` by this predicate, and the flag
826+
* `exact` indicates whether `t` is an exact bound or merely an upper bound.
827+
*/
828+
cached
829+
predicate exprUnionTypeFlow(Expr e, RefType t, boolean exact) {
830+
exists(TypeFlowNode n |
831+
n.asExpr() = e and
832+
bestUnionType(n, t, exact)
625833
)
626834
}
627835
}

java/ql/lib/semmle/code/java/dataflow/internal/DataFlowDispatch.qll

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ private import DataFlowUtil
44
private import semmle.code.java.dataflow.InstanceAccess
55
private import semmle.code.java.dataflow.FlowSummary
66
private import semmle.code.java.dispatch.VirtualDispatch as VirtualDispatch
7+
private import semmle.code.java.dataflow.TypeFlow
78
private import semmle.code.java.dispatch.internal.Unification
89

910
private module DispatchImpl {
@@ -63,15 +64,21 @@ private module DispatchImpl {
6364
private predicate contextArgHasType(Call ctx, int i, RefType t, boolean exact) {
6465
relevantContext(ctx, i) and
6566
exists(RefType srctype |
66-
exists(Expr arg, Expr src |
67+
exists(Expr arg |
6768
i = -1 and
6869
ctx.getQualifier() = arg
6970
or
7071
ctx.getArgument(i) = arg
7172
|
72-
src = VirtualDispatch::variableTrack(arg) and
73-
srctype = getPreciseType(src) and
74-
if src instanceof ClassInstanceExpr then exact = true else exact = false
73+
exprTypeFlow(arg, srctype, exact)
74+
or
75+
not exprTypeFlow(arg, _, _) and
76+
exprUnionTypeFlow(arg, srctype, exact)
77+
or
78+
not exprTypeFlow(arg, _, _) and
79+
not exprUnionTypeFlow(arg, _, _) and
80+
srctype = getPreciseType(arg) and
81+
if arg instanceof ClassInstanceExpr then exact = true else exact = false
7582
)
7683
or
7784
exists(Node arg |

0 commit comments

Comments
 (0)