Skip to content

Commit 22946b1

Browse files
authored
Merge pull request #10574 from hvitved/ruby/reverse-known-stores
Ruby: Fix spurious flow through reverse stores
2 parents 9780dff + 92a38b3 commit 22946b1

File tree

12 files changed

+562
-244
lines changed

12 files changed

+562
-244
lines changed

docs/ql-libraries/dataflow/dataflow.md

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -419,20 +419,31 @@ class Content extends TContent {
419419
420420
newtype TContentSet =
421421
TSingletonContent(Content c) or
422+
TKnownOrUnknownArrayElementContent(TKnownArrayElementContent c) or
422423
TAnyArrayElementContent()
423424
424425
class ContentSet extends TContentSet {
425426
Content getAStoreContent() {
426427
this = TSingletonContent(result)
427428
or
428429
// for reverse stores
430+
this = TKnownOrUnknownArrayElementContent(result)
431+
or
432+
// for reverse stores
429433
this = TAnyArrayElementContent() and
430434
result = TUnknownArrayElementContent()
431435
}
432436
433437
Content getAReadContent() {
434438
this = TSingletonContent(result)
435439
or
440+
exists(TKnownArrayElementContent c |
441+
this = TKnownOrUnknownArrayElementContent(c) |
442+
result = c
443+
or
444+
result = TUnknownArrayElementContent()
445+
)
446+
or
436447
this = TAnyArrayElementContent() and
437448
(result = TUnknownArrayElementContent() or result = TKnownArrayElementContent(_))
438449
}
@@ -447,12 +458,10 @@ a[0] = tainted
447458
# storeStep(not_tainted, TSingletonContent(TKnownArrayElementContent(1)), [post update] a)
448459
a[1] = not_tainted
449460

450-
# readStep(a, TSingletonContent(TKnownArrayElementContent(0)), a[0])
451-
# readStep(a, TSingletonContent(TUnknownArrayElementContent()), a[0])
461+
# readStep(a, TKnownOrUnknownArrayElementContent(TKnownArrayElementContent(0)), a[0])
452462
sink(a[0]) # bad
453463

454-
# readStep(a, TSingletonContent(TKnownArrayElementContent(1)), a[1])
455-
# readStep(a, TSingletonContent(TUnknownArrayElementContent()), a[1])
464+
# readStep(a, TKnownOrUnknownArrayElementContent(TKnownArrayElementContent(1)), a[1])
456465
sink(a[1]) # good
457466

458467
# readStep(a, TAnyArrayElementContent(), a[unknown])
@@ -461,26 +470,24 @@ sink(a[unknown]) # bad; unknown may be 0
461470
# storeStep(tainted, TSingletonContent(TUnknownArrayElementContent()), [post update] b)
462471
b[unknown] = tainted
463472

464-
# readStep(b, TSingletonContent(TKnownArrayElementContent(0)), b[0])
465-
# readStep(b, TSingletonContent(TUnknownArrayElementContent()), b[0])
473+
# readStep(b, TKnownOrUnknownArrayElementContent(TKnownArrayElementContent(0)), b[0])
466474
sink(b[0]) # bad; unknown may be 0
467475

468-
# storeStep(tainted, TSingletonContent(TKnownArrayElementContent(0)), [post update] c[unknown])
469-
# storeStep(not_tainted, TSingletonContent(TKnownArrayElementContent(1)), [post update] c[unknown])
470-
# readStep(c, TAnyArrayElementContent(), c[unknown])
471-
# storeStep([post update] c[unknown], TAnyArrayElementContent(), [post update] c) # auto-generated reverse store (see Example 2)
472-
c[unknown][0] = tainted
473-
c[unknown][1] = not_tainted
474-
475-
476-
# readStep(c[0], TSingletonContent(TKnownArrayElementContent(0)), c[0][0])
477-
# readStep(c[0], TSingletonContent(TUnknownArrayElementContent()), c[0][0])
478-
# readStep(c[0], TSingletonContent(TKnownArrayElementContent(1)), c[0][1])
479-
# readStep(c[0], TSingletonContent(TUnknownArrayElementContent()), c[0][1])
480-
# readStep(c, TSingletonContent(TKnownArrayElementContent(0)), c[0])
481-
# readStep(c, TSingletonContent(TUnknownArrayElementContent()), c[0])
476+
# storeStep(tainted, TSingletonContent(TUnknownArrayElementContent()), [post update] c[0])
477+
# storeStep(not_tainted, TSingletonContent(TUnknownArrayElementContent()), [post update] c[1])
478+
# readStep(c, TKnownOrUnknownArrayElementContent(TKnownArrayElementContent(0)), c[0])
479+
# readStep(c, TKnownOrUnknownArrayElementContent(TKnownArrayElementContent(1)), c[1])
480+
# storeStep([post update] c[0], TSingletonContent(TKnownArrayElementContent(0)), [post update] c) # auto-generated reverse store (see Example 2)
481+
# storeStep([post update] c[1], TSingletonContent(TKnownArrayElementContent(1)), [post update] c) # auto-generated reverse store (see Example 2)
482+
c[0][unknown] = tainted
483+
c[1][unknown] = not_tainted
484+
485+
# readStep(c[0], TKnownOrUnknownArrayElementContent(TKnownArrayElementContent(0)), c[0][0])
486+
# readStep(c[1], TKnownOrUnknownArrayElementContent(TKnownArrayElementContent(0)), c[1][0])
487+
# readStep(c, TKnownOrUnknownArrayElementContent(TKnownArrayElementContent(0)), c[0])
488+
# readStep(c, TKnownOrUnknownArrayElementContent(TKnownArrayElementContent(1)), c[1])
482489
sink(c[0][0]) # bad; unknown may be 0
483-
sink(c[0][1]) # good
490+
sink(c[1][0]) # good
484491
```
485492

486493
### Field flow barriers

ruby/ql/lib/codeql/ruby/dataflow/FlowSummary.qll

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,17 @@ module SummaryComponent {
4545
result = SC::content(TSingletonContent(DataFlow::Content::getElementContent(cv)))
4646
}
4747

48+
/**
49+
* Gets a summary component that represents an element in a collection at a specific
50+
* known index `cv`, or an uknown index.
51+
*/
52+
SummaryComponent elementKnownOrUnknown(ConstantValue cv) {
53+
result = SC::content(TKnownOrUnknownElementContent(TKnownElementContent(cv)))
54+
or
55+
not exists(TKnownElementContent(cv)) and
56+
result = elementUnknown()
57+
}
58+
4859
/**
4960
* Gets a summary component that represents an element in a collection at either an unknown
5061
* index or known index. This has the same semantics as
@@ -62,7 +73,15 @@ module SummaryComponent {
6273
* integer index `lower` or above.
6374
*/
6475
SummaryComponent elementLowerBound(int lower) {
65-
result = SC::content(TElementLowerBoundContent(lower))
76+
result = SC::content(TElementLowerBoundContent(lower, false))
77+
}
78+
79+
/**
80+
* Gets a summary component that represents an element in a collection at known
81+
* integer index `lower` or above, or possibly at an unknown index.
82+
*/
83+
SummaryComponent elementLowerBoundOrUnknown(int lower) {
84+
result = SC::content(TElementLowerBoundContent(lower, true))
6685
}
6786

6887
/** Gets a summary component that represents a value in a pair at an unknown key. */

ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -380,8 +380,10 @@ private module Cached {
380380
newtype TContentSet =
381381
TSingletonContent(Content c) or
382382
TAnyElementContent() or
383-
TElementLowerBoundContent(int lower) {
384-
FlowSummaryImplSpecific::ParsePositions::isParsedElementLowerBoundPosition(_, lower)
383+
TKnownOrUnknownElementContent(Content::KnownElementContent c) or
384+
TElementLowerBoundContent(int lower, boolean includeUnknown) {
385+
FlowSummaryImplSpecific::ParsePositions::isParsedElementLowerBoundPosition(_, includeUnknown,
386+
lower)
385387
}
386388

387389
cached

ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll

Lines changed: 54 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -329,11 +329,28 @@ class ContentSet extends TContentSet {
329329
/** Holds if this content set represents all `ElementContent`s. */
330330
predicate isAnyElement() { this = TAnyElementContent() }
331331

332+
/**
333+
* Holds if this content set represents a specific known element index, or an
334+
* unknown element index.
335+
*/
336+
predicate isKnownOrUnknownElement(Content::KnownElementContent c) {
337+
this = TKnownOrUnknownElementContent(c)
338+
}
339+
332340
/**
333341
* Holds if this content set represents all `KnownElementContent`s where
334342
* the index is an integer greater than or equal to `lower`.
335343
*/
336-
predicate isElementLowerBound(int lower) { this = TElementLowerBoundContent(lower) }
344+
predicate isElementLowerBound(int lower) { this = TElementLowerBoundContent(lower, false) }
345+
346+
/**
347+
* Holds if this content set represents `UnknownElementContent` unioned with
348+
* all `KnownElementContent`s where the index is an integer greater than or
349+
* equal to `lower`.
350+
*/
351+
predicate isElementLowerBoundOrUnknown(int lower) {
352+
this = TElementLowerBoundContent(lower, true)
353+
}
337354

338355
/** Gets a textual representation of this content set. */
339356
string toString() {
@@ -345,8 +362,18 @@ class ContentSet extends TContentSet {
345362
this.isAnyElement() and
346363
result = "any element"
347364
or
348-
exists(int lower |
349-
this.isElementLowerBound(lower) and
365+
exists(Content::KnownElementContent c |
366+
this.isKnownOrUnknownElement(c) and
367+
result = c + " or unknown"
368+
)
369+
or
370+
exists(int lower, boolean includeUnknown |
371+
this = TElementLowerBoundContent(lower, includeUnknown)
372+
|
373+
includeUnknown = false and
374+
result = lower + "..!"
375+
or
376+
includeUnknown = true and
350377
result = lower + ".."
351378
)
352379
}
@@ -355,9 +382,18 @@ class ContentSet extends TContentSet {
355382
Content getAStoreContent() {
356383
this.isSingleton(result)
357384
or
385+
// For reverse stores, `a[unknown][0] = x`, it is important that the read-step
386+
// from `a` to `a[unknown]` (which can read any element), gets translated into
387+
// a reverse store step that store only into `?`
358388
this.isAnyElement() and
359389
result = TUnknownElementContent()
360390
or
391+
// For reverse stores, `a[1][0] = x`, it is important that the read-step
392+
// from `a` to `a[1]` (which can read both elements stored at exactly index `1`
393+
// and elements stored at unknown index), gets translated into a reverse store
394+
// step that store only into `1`
395+
this.isKnownOrUnknownElement(result)
396+
or
361397
this.isElementLowerBound(_) and
362398
result = TUnknownElementContent()
363399
}
@@ -369,10 +405,21 @@ class ContentSet extends TContentSet {
369405
this.isAnyElement() and
370406
result instanceof Content::ElementContent
371407
or
372-
exists(int lower, int i |
373-
this.isElementLowerBound(lower) and
374-
result.(Content::KnownElementContent).getIndex().isInt(i) and
375-
i >= lower
408+
exists(Content::KnownElementContent c | this.isKnownOrUnknownElement(c) |
409+
result = c or
410+
result = TUnknownElementContent()
411+
)
412+
or
413+
exists(int lower, boolean includeUnknown |
414+
this = TElementLowerBoundContent(lower, includeUnknown)
415+
|
416+
exists(int i |
417+
result.(Content::KnownElementContent).getIndex().isInt(i) and
418+
i >= lower
419+
)
420+
or
421+
includeUnknown = true and
422+
result = TUnknownElementContent()
376423
)
377424
}
378425
}

ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImplSpecific.qll

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -74,16 +74,30 @@ private SummaryComponent interpretElementArg(string arg) {
7474
arg = "any" and
7575
result = FlowSummary::SummaryComponent::elementAny()
7676
or
77-
exists(int lower |
78-
ParsePositions::isParsedElementLowerBoundPosition(arg, lower) and
77+
exists(int lower, boolean includeUnknown |
78+
ParsePositions::isParsedElementLowerBoundPosition(arg, includeUnknown, lower)
79+
|
80+
includeUnknown = false and
7981
result = FlowSummary::SummaryComponent::elementLowerBound(lower)
82+
or
83+
includeUnknown = true and
84+
result = FlowSummary::SummaryComponent::elementLowerBoundOrUnknown(lower)
8085
)
8186
or
82-
exists(ConstantValue cv | result = FlowSummary::SummaryComponent::elementKnown(cv) |
83-
cv.isInt(AccessPath::parseInt(arg))
87+
exists(ConstantValue cv, string argAdjusted, boolean includeUnknown |
88+
argAdjusted = ParsePositions::adjustElementArgument(arg, includeUnknown) and
89+
(
90+
includeUnknown = false and
91+
result = FlowSummary::SummaryComponent::elementKnown(cv)
92+
or
93+
includeUnknown = true and
94+
result = FlowSummary::SummaryComponent::elementKnownOrUnknown(cv)
95+
)
96+
|
97+
cv.isInt(AccessPath::parseInt(argAdjusted))
8498
or
85-
not exists(AccessPath::parseInt(arg)) and
86-
cv.serialize() = arg
99+
not exists(AccessPath::parseInt(argAdjusted)) and
100+
cv.serialize() = argAdjusted
87101
)
88102
}
89103

@@ -277,9 +291,19 @@ module ParsePositions {
277291
c = paramName + ":"
278292
}
279293

280-
predicate isParsedElementLowerBoundPosition(string c, int lower) {
294+
bindingset[arg]
295+
string adjustElementArgument(string arg, boolean includeUnknown) {
296+
result = arg.regexpCapture("(.*)!", 1) and
297+
includeUnknown = false
298+
or
299+
result = arg and
300+
not arg.matches("%!") and
301+
includeUnknown = true
302+
}
303+
304+
predicate isParsedElementLowerBoundPosition(string c, boolean includeUnknown, int lower) {
281305
isElementBody(c) and
282-
lower = AccessPath::parseLowerBound(c)
306+
lower = AccessPath::parseLowerBound(adjustElementArgument(c, includeUnknown))
283307
}
284308
}
285309

0 commit comments

Comments
 (0)