Skip to content

Commit 784eef3

Browse files
committed
Java: Support SCCs in TypeFlow.
1 parent b5f9fbe commit 784eef3

File tree

1 file changed

+182
-42
lines changed

1 file changed

+182
-42
lines changed

java/ql/lib/semmle/code/java/dataflow/TypeFlow.qll

Lines changed: 182 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,16 @@ private class TypeFlowNode extends TTypeFlowNode {
5353
}
5454
}
5555

56+
private int getNodeKind(TypeFlowNode n) {
57+
result = 1 and n instanceof TField
58+
or
59+
result = 2 and n instanceof TSsa
60+
or
61+
result = 3 and n instanceof TExpr
62+
or
63+
result = 4 and n instanceof TMethod
64+
}
65+
5666
/** Gets `t` if it is a `RefType` or the boxed type if `t` is a primitive type. */
5767
private RefType boxIfNeeded(Type t) {
5868
t.(PrimitiveType).getBoxedType() = result or
@@ -146,27 +156,165 @@ private predicate joinStep(TypeFlowNode n1, TypeFlowNode n2) {
146156
joinStep0(n1, n2) and not isNull(n1)
147157
}
148158

149-
private predicate joinStepRank1(int r, TypeFlowNode n1, TypeFlowNode n2) {
150-
n1 =
151-
rank[r](TypeFlowNode n |
152-
joinStep(n, n2)
159+
private predicate anyStep(TypeFlowNode n1, TypeFlowNode n2) { joinStep(n1, n2) or step(n1, n2) }
160+
161+
private predicate sccEdge(TypeFlowNode n1, TypeFlowNode n2) { anyStep(n1, n2) and anyStep+(n2, n1) }
162+
163+
/*
164+
* SCC reduction.
165+
*
166+
* This ought to be as easy as `equivalenceRelation(sccEdge/2)(n, scc)`, but
167+
* this HOP is not currently supported for newtypes.
168+
*
169+
* A straightforward implementation would be:
170+
* ```
171+
* predicate sccRepr(TypeFlowNode n, TypeFlowNode scc) {
172+
* scc =
173+
* max(TypeFlowNode n2 |
174+
* sccEdge+(n, n2)
175+
* |
176+
* n2
177+
* order by
178+
* n2.getLocation().getStartLine(), n2.getLocation().getStartColumn(), getNodeKind(n2)
179+
* )
180+
* }
181+
*
182+
* ```
183+
* but this is quadratic in the size of the SCCs.
184+
*
185+
* Instead we find local maxima and determine the SCC representatives from those.
186+
* (This is still worst-case quadratic in the size of the SCCs, but generally
187+
* performs better.)
188+
*/
189+
190+
private predicate sccEdgeWithMax(TypeFlowNode n1, TypeFlowNode n2, TypeFlowNode m) {
191+
sccEdge(n1, n2) and
192+
m =
193+
max(TypeFlowNode n |
194+
n = [n1, n2]
153195
|
154-
n order by n.getLocation().getStartLine(), n.getLocation().getStartColumn()
196+
n order by n.getLocation().getStartLine(), n.getLocation().getStartColumn(), getNodeKind(n)
155197
)
156198
}
157199

158-
private predicate joinStepRank2(int r2, int r1, TypeFlowNode n) {
159-
r1 = rank[r2](int r | joinStepRank1(r, _, n) | r)
200+
private predicate hasLargerNeighbor(TypeFlowNode n) {
201+
exists(TypeFlowNode n2 |
202+
sccEdgeWithMax(n, n2, n2) and
203+
not sccEdgeWithMax(n, n2, n)
204+
or
205+
sccEdgeWithMax(n2, n, n2) and
206+
not sccEdgeWithMax(n2, n, n)
207+
)
208+
}
209+
210+
private predicate localMax(TypeFlowNode m) {
211+
sccEdgeWithMax(_, _, m) and
212+
not hasLargerNeighbor(m)
160213
}
161214

162-
private predicate joinStepRank(int r, TypeFlowNode n1, TypeFlowNode n2) {
163-
exists(int r1 |
164-
joinStepRank1(r1, n1, n2) and
165-
joinStepRank2(r, r1, n2)
215+
private predicate sccReprFromLocalMax(TypeFlowNode scc) {
216+
exists(TypeFlowNode m |
217+
localMax(m) and
218+
scc =
219+
max(TypeFlowNode n2 |
220+
sccEdge+(m, n2) and localMax(n2)
221+
|
222+
n2
223+
order by
224+
n2.getLocation().getStartLine(), n2.getLocation().getStartColumn(), getNodeKind(n2)
225+
)
166226
)
167227
}
168228

169-
private int lastRank(TypeFlowNode n) { result = max(int r | joinStepRank(r, _, n)) }
229+
private predicate sccRepr(TypeFlowNode n, TypeFlowNode scc) {
230+
sccEdge+(n, scc) and sccReprFromLocalMax(scc)
231+
}
232+
233+
private predicate sccJoinStep(TypeFlowNode n, TypeFlowNode scc) {
234+
exists(TypeFlowNode mid |
235+
joinStep(n, mid) and
236+
sccRepr(mid, scc) and
237+
not sccRepr(n, scc)
238+
)
239+
}
240+
241+
private signature predicate edgeSig(TypeFlowNode n1, TypeFlowNode n2);
242+
243+
private signature module RankedEdge {
244+
predicate edgeRank(int r, TypeFlowNode n1, TypeFlowNode n2);
245+
246+
int lastRank(TypeFlowNode n);
247+
}
248+
249+
private module RankEdge<edgeSig/2 edge> implements RankedEdge {
250+
private predicate edgeRank1(int r, TypeFlowNode n1, TypeFlowNode n2) {
251+
n1 =
252+
rank[r](TypeFlowNode n |
253+
edge(n, n2)
254+
|
255+
n order by n.getLocation().getStartLine(), n.getLocation().getStartColumn()
256+
)
257+
}
258+
259+
private predicate edgeRank2(int r2, int r1, TypeFlowNode n) {
260+
r1 = rank[r2](int r | edgeRank1(r, _, n) | r)
261+
}
262+
263+
predicate edgeRank(int r, TypeFlowNode n1, TypeFlowNode n2) {
264+
exists(int r1 |
265+
edgeRank1(r1, n1, n2) and
266+
edgeRank2(r, r1, n2)
267+
)
268+
}
269+
270+
int lastRank(TypeFlowNode n) { result = max(int r | edgeRank(r, _, n)) }
271+
}
272+
273+
private signature module TypePropagation {
274+
predicate candType(TypeFlowNode n, RefType t);
275+
276+
bindingset[t]
277+
predicate supportsType(TypeFlowNode n, RefType t);
278+
}
279+
280+
/** Implements recursion through `forall` by way of edge ranking. */
281+
private module ForAll<RankedEdge Edge, TypePropagation T> {
282+
/**
283+
* Holds if `t` is a bound that holds on one of the incoming edges to `n` and
284+
* thus is a candidate bound for `n`.
285+
*/
286+
pragma[nomagic]
287+
private predicate candJoinType(TypeFlowNode n, RefType t) {
288+
exists(TypeFlowNode mid |
289+
T::candType(mid, t) and
290+
Edge::edgeRank(_, mid, n)
291+
)
292+
}
293+
294+
/**
295+
* Holds if `t` is a candidate bound for `n` that is also valid for data coming
296+
* through the edges into `n` ranked from `1` to `r`.
297+
*/
298+
private predicate flowJoin(int r, TypeFlowNode n, RefType t) {
299+
(
300+
r = 1 and candJoinType(n, t)
301+
or
302+
flowJoin(r - 1, n, t) and Edge::edgeRank(r, _, n)
303+
) and
304+
forall(TypeFlowNode mid | Edge::edgeRank(r, mid, n) | T::supportsType(mid, t))
305+
}
306+
307+
/**
308+
* Holds if `t` is a candidate bound for `n` that is also valid for data
309+
* coming through all the incoming edges, and therefore is a valid bound for
310+
* `n`.
311+
*/
312+
predicate flowJoin(TypeFlowNode n, RefType t) { flowJoin(Edge::lastRank(n), n, t) }
313+
}
314+
315+
module RankedJoinStep = RankEdge<joinStep/2>;
316+
317+
module RankedSccJoinStep = RankEdge<sccJoinStep/2>;
170318

171319
private predicate exactTypeBase(TypeFlowNode n, RefType t) {
172320
exists(ClassInstanceExpr e |
@@ -177,15 +325,10 @@ private predicate exactTypeBase(TypeFlowNode n, RefType t) {
177325
)
178326
}
179327

180-
private predicate exactTypeRank(int r, TypeFlowNode n, RefType t) {
181-
forall(TypeFlowNode mid | joinStepRank(r, mid, n) | exactType(mid, t)) and
182-
joinStepRank(r, _, n)
183-
}
328+
private module ExactTypePropagation implements TypePropagation {
329+
predicate candType = exactType/2;
184330

185-
private predicate exactTypeJoin(int r, TypeFlowNode n, RefType t) {
186-
exactTypeRank(1, n, t) and r = 1
187-
or
188-
exactTypeJoin(r - 1, n, t) and exactTypeRank(r, n, t)
331+
predicate supportsType = exactType/2;
189332
}
190333

191334
/**
@@ -199,7 +342,14 @@ private predicate exactType(TypeFlowNode n, RefType t) {
199342
or
200343
// The following is an optimized version of
201344
// `forex(TypeFlowNode mid | joinStep(mid, n) | exactType(mid, t))`
202-
exactTypeJoin(lastRank(n), n, t)
345+
ForAll<RankedJoinStep, ExactTypePropagation>::flowJoin(n, t)
346+
or
347+
exists(TypeFlowNode scc |
348+
sccRepr(n, scc) and
349+
// Optimized version of
350+
// `forex(TypeFlowNode mid | sccJoinStep(mid, scc) | exactType(mid, t))`
351+
ForAll<RankedSccJoinStep, ExactTypePropagation>::flowJoin(scc, t)
352+
)
203353
}
204354

205355
/**
@@ -343,30 +493,15 @@ private predicate typeFlowBase(TypeFlowNode n, RefType t) {
343493
)
344494
}
345495

346-
/**
347-
* Holds if `t` is a bound that holds on one of the incoming edges to `n` and
348-
* thus is a candidate bound for `n`.
349-
*/
350-
pragma[noinline]
351-
private predicate typeFlowJoinCand(TypeFlowNode n, RefType t) {
352-
exists(TypeFlowNode mid | joinStep(mid, n) | typeFlow(mid, t))
353-
}
496+
private module TypeFlowPropagation implements TypePropagation {
497+
predicate candType = typeFlow/2;
354498

355-
/**
356-
* Holds if `t` is a candidate bound for `n` that is also valid for data coming
357-
* through the edges into `n` ranked from `1` to `r`.
358-
*/
359-
private predicate typeFlowJoin(int r, TypeFlowNode n, RefType t) {
360-
(
361-
r = 1 and typeFlowJoinCand(n, t)
362-
or
363-
typeFlowJoin(r - 1, n, t) and joinStepRank(r, _, n)
364-
) and
365-
forall(TypeFlowNode mid | joinStepRank(r, mid, n) |
499+
bindingset[t]
500+
predicate supportsType(TypeFlowNode mid, RefType t) {
366501
exists(RefType midtyp | exactType(mid, midtyp) or typeFlow(mid, midtyp) |
367502
pragma[only_bind_out](midtyp).getAnAncestor() = t
368503
)
369-
)
504+
}
370505
}
371506

372507
/**
@@ -378,7 +513,12 @@ private predicate typeFlow(TypeFlowNode n, RefType t) {
378513
or
379514
exists(TypeFlowNode mid | typeFlow(mid, t) and step(mid, n))
380515
or
381-
typeFlowJoin(lastRank(n), n, t)
516+
ForAll<RankedJoinStep, TypeFlowPropagation>::flowJoin(n, t)
517+
or
518+
exists(TypeFlowNode scc |
519+
sccRepr(n, scc) and
520+
ForAll<RankedSccJoinStep, TypeFlowPropagation>::flowJoin(scc, t)
521+
)
382522
}
383523

384524
pragma[nomagic]

0 commit comments

Comments
 (0)