Skip to content

Commit 975edac

Browse files
authored
Merge pull request #9969 from hvitved/ruby/kwargs-missing-flow
Ruby: Support more flow through keyword arguments
2 parents b90a404 + 9268437 commit 975edac

File tree

7 files changed

+328
-202
lines changed

7 files changed

+328
-202
lines changed

ruby/ql/consistency-queries/DataFlowConsistency.ql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@ private class MyConsistencyConfiguration extends ConsistencyConfiguration {
1010
or
1111
n instanceof SummaryNode
1212
or
13-
n instanceof HashSplatArgumentsNode
13+
n instanceof SynthHashSplatArgumentNode
1414
}
1515
}

ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowDispatch.qll

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,12 @@ class DataFlowCallable extends TDataFlowCallable {
6565
string toString() { result = [this.asCallable().toString(), this.asLibraryCallable()] }
6666

6767
/** Gets the location of this callable. */
68-
Location getLocation() { result = this.asCallable().getLocation() }
68+
Location getLocation() {
69+
result = this.asCallable().getLocation()
70+
or
71+
this instanceof TLibraryCallable and
72+
result instanceof EmptyLocation
73+
}
6974
}
7075

7176
/**

ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll

Lines changed: 123 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,9 @@ private module Cached {
227227
} or
228228
TSelfParameterNode(MethodBase m) or
229229
TBlockParameterNode(MethodBase m) or
230+
TSynthHashSplatParameterNode(DataFlowCallable c) {
231+
isParameterNode(_, c, any(ParameterPosition p | p.isKeyword(_)))
232+
} or
230233
TExprPostUpdateNode(CfgNodes::ExprCfgNode n) {
231234
n instanceof Argument or
232235
n = any(CfgNodes::ExprNodes::InstanceVariableAccessCfgNode v).getReceiver()
@@ -240,12 +243,13 @@ private module Cached {
240243
TSummaryParameterNode(FlowSummaryImpl::Public::SummarizedCallable c, ParameterPosition pos) {
241244
FlowSummaryImpl::Private::summaryParameterNodeRange(c, pos)
242245
} or
243-
THashSplatArgumentsNode(CfgNodes::ExprNodes::CallCfgNode c) {
246+
TSynthHashSplatArgumentNode(CfgNodes::ExprNodes::CallCfgNode c) {
244247
exists(Argument arg | arg.isArgumentOf(c, any(ArgumentPosition pos | pos.isKeyword(_))))
245248
}
246249

247250
class TParameterNode =
248-
TNormalParameterNode or TBlockParameterNode or TSelfParameterNode or TSummaryParameterNode;
251+
TNormalParameterNode or TBlockParameterNode or TSelfParameterNode or
252+
TSynthHashSplatParameterNode or TSummaryParameterNode;
249253

250254
private predicate defaultValueFlow(NamedParameter p, ExprNode e) {
251255
p.(OptionalParameter).getDefaultValue() = e.getExprNode().getExpr()
@@ -328,18 +332,21 @@ private module Cached {
328332

329333
cached
330334
predicate isLocalSourceNode(Node n) {
331-
n instanceof ParameterNode
332-
or
333-
n instanceof PostUpdateNodes::ExprPostUpdateNode
334-
or
335-
// Nodes that can't be reached from another entry definition or expression.
336-
not reachedFromExprOrEntrySsaDef(n)
337-
or
338-
// Ensure all entry SSA definitions are local sources -- for parameters, this
339-
// is needed by type tracking. Note that when the parameter has a default value,
340-
// it will be reachable from an expression (the default value) and therefore
341-
// won't be caught by the rule above.
342-
entrySsaDefinition(n)
335+
not n instanceof SynthHashSplatParameterNode and
336+
(
337+
n instanceof ParameterNode
338+
or
339+
n instanceof PostUpdateNodes::ExprPostUpdateNode
340+
or
341+
// Nodes that can't be reached from another entry definition or expression.
342+
not reachedFromExprOrEntrySsaDef(n)
343+
or
344+
// Ensure all entry SSA definitions are local sources -- for parameters, this
345+
// is needed by type tracking. Note that when the parameter has a default value,
346+
// it will be reachable from an expression (the default value) and therefore
347+
// won't be caught by the rule above.
348+
entrySsaDefinition(n)
349+
)
343350
}
344351

345352
cached
@@ -415,7 +422,9 @@ predicate nodeIsHidden(Node n) {
415422
or
416423
n instanceof SynthReturnNode
417424
or
418-
n instanceof HashSplatArgumentsNode
425+
n instanceof SynthHashSplatParameterNode
426+
or
427+
n instanceof SynthHashSplatArgumentNode
419428
}
420429

421430
/** An SSA definition, viewed as a node in a data flow graph. */
@@ -470,10 +479,13 @@ private module ParameterNodes {
470479
abstract class ParameterNodeImpl extends NodeImpl {
471480
abstract Parameter getParameter();
472481

473-
abstract predicate isSourceParameterOf(Callable c, ParameterPosition pos);
482+
abstract predicate isParameterOf(DataFlowCallable c, ParameterPosition pos);
474483

475-
predicate isParameterOf(DataFlowCallable c, ParameterPosition pos) {
476-
this.isSourceParameterOf(c.asCallable(), pos)
484+
final predicate isSourceParameterOf(Callable c, ParameterPosition pos) {
485+
exists(DataFlowCallable callable |
486+
this.isParameterOf(callable, pos) and
487+
c = callable.asCallable()
488+
)
477489
}
478490
}
479491

@@ -488,21 +500,23 @@ private module ParameterNodes {
488500

489501
override Parameter getParameter() { result = parameter }
490502

491-
override predicate isSourceParameterOf(Callable c, ParameterPosition pos) {
492-
exists(int i | pos.isPositional(i) and c.getParameter(i) = parameter |
493-
parameter instanceof SimpleParameter
503+
override predicate isParameterOf(DataFlowCallable c, ParameterPosition pos) {
504+
exists(Callable callable | callable = c.asCallable() |
505+
exists(int i | pos.isPositional(i) and callable.getParameter(i) = parameter |
506+
parameter instanceof SimpleParameter
507+
or
508+
parameter instanceof OptionalParameter
509+
)
510+
or
511+
parameter =
512+
any(KeywordParameter kp |
513+
callable.getAParameter() = kp and
514+
pos.isKeyword(kp.getName())
515+
)
494516
or
495-
parameter instanceof OptionalParameter
517+
parameter = callable.getAParameter().(HashSplatParameter) and
518+
pos.isHashSplat()
496519
)
497-
or
498-
parameter =
499-
any(KeywordParameter kp |
500-
c.getAParameter() = kp and
501-
pos.isKeyword(kp.getName())
502-
)
503-
or
504-
parameter = c.getAParameter().(HashSplatParameter) and
505-
pos.isHashSplat()
506520
}
507521

508522
override CfgScope getCfgScope() { result = parameter.getCallable() }
@@ -525,8 +539,8 @@ private module ParameterNodes {
525539

526540
override Parameter getParameter() { none() }
527541

528-
override predicate isSourceParameterOf(Callable c, ParameterPosition pos) {
529-
method = c and pos.isSelf()
542+
override predicate isParameterOf(DataFlowCallable c, ParameterPosition pos) {
543+
method = c.asCallable() and pos.isSelf()
530544
}
531545

532546
override CfgScope getCfgScope() { result = method }
@@ -551,8 +565,8 @@ private module ParameterNodes {
551565
result = method.getAParameter() and result instanceof BlockParameter
552566
}
553567

554-
override predicate isSourceParameterOf(Callable c, ParameterPosition pos) {
555-
c = method and pos.isBlock()
568+
override predicate isParameterOf(DataFlowCallable c, ParameterPosition pos) {
569+
c.asCallable() = method and pos.isBlock()
556570
}
557571

558572
override CfgScope getCfgScope() { result = method }
@@ -570,6 +584,73 @@ private module ParameterNodes {
570584
}
571585
}
572586

587+
/**
588+
* For all methods containing keyword parameters, we construct a synthesized
589+
* (hidden) parameter node to contain all keyword arguments. This allows us
590+
* to handle cases like
591+
*
592+
* ```rb
593+
* def foo(p1:, p2:)
594+
* sink(p1)
595+
* sink(p2)
596+
* end
597+
*
598+
* args = {:p1 => taint(1), :p2 => taint(2) }
599+
* foo(**args)
600+
* ```
601+
*
602+
* by adding read steps out of the synthesized parameter node to the relevant
603+
* keyword parameters.
604+
*
605+
* Note that this will introduce a bit of redundancy in cases like
606+
*
607+
* ```rb
608+
* foo(p1: taint(1), p2: taint(2))
609+
* ```
610+
*
611+
* where direct keyword matching is possible, since we construct a synthesized hash
612+
* splat argument (`SynthHashSplatArgumentNode`) at the call site, which means that
613+
* `taint(1)` will flow into `p1` both via normal keyword matching and via the synthesized
614+
* nodes (and similarly for `p2`). However, this redunancy is OK since
615+
* (a) it means that type-tracking through keyword arguments also works in most cases,
616+
* (b) read/store steps can be avoided when direct keyword matching is possible, and
617+
* hence access path limits are not a concern, and
618+
* (c) since the synthesized nodes are hidden, the reported data-flow paths will be
619+
* collapsed anyway.
620+
*/
621+
class SynthHashSplatParameterNode extends ParameterNodeImpl, TSynthHashSplatParameterNode {
622+
private DataFlowCallable callable;
623+
624+
SynthHashSplatParameterNode() { this = TSynthHashSplatParameterNode(callable) }
625+
626+
/**
627+
* Gets a keyword parameter that will be the result of reading `c` out of this
628+
* synthesized node.
629+
*/
630+
ParameterNode getAKeywordParameter(ContentSet c) {
631+
exists(string name |
632+
isParameterNode(result, callable, any(ParameterPosition p | p.isKeyword(name)))
633+
|
634+
c = getKeywordContent(name) or
635+
c.isSingleton(TUnknownElementContent())
636+
)
637+
}
638+
639+
final override Parameter getParameter() { none() }
640+
641+
final override predicate isParameterOf(DataFlowCallable c, ParameterPosition pos) {
642+
c = callable and pos.isHashSplat()
643+
}
644+
645+
final override CfgScope getCfgScope() { result = callable.asCallable() }
646+
647+
final override DataFlowCallable getEnclosingCallable() { result = callable }
648+
649+
final override Location getLocationImpl() { result = callable.getLocation() }
650+
651+
final override string toStringImpl() { result = "**kwargs" }
652+
}
653+
573654
/** A parameter for a library callable with a flow summary. */
574655
class SummaryParameterNode extends ParameterNodeImpl, TSummaryParameterNode {
575656
private FlowSummaryImpl::Public::SummarizedCallable sc;
@@ -579,8 +660,6 @@ private module ParameterNodes {
579660

580661
override Parameter getParameter() { none() }
581662

582-
override predicate isSourceParameterOf(Callable c, ParameterPosition pos) { none() }
583-
584663
override predicate isParameterOf(DataFlowCallable c, ParameterPosition pos) {
585664
sc = c.asLibraryCallable() and pos = pos_
586665
}
@@ -689,10 +768,10 @@ private module ArgumentNodes {
689768
* part of the method signature, such that those cannot end up in the hash-splat
690769
* parameter.
691770
*/
692-
class HashSplatArgumentsNode extends ArgumentNode, THashSplatArgumentsNode {
771+
class SynthHashSplatArgumentNode extends ArgumentNode, TSynthHashSplatArgumentNode {
693772
CfgNodes::ExprNodes::CallCfgNode c;
694773

695-
HashSplatArgumentsNode() { this = THashSplatArgumentsNode(c) }
774+
SynthHashSplatArgumentNode() { this = TSynthHashSplatArgumentNode(c) }
696775

697776
override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
698777
this.sourceArgumentOf(call.asCall(), pos)
@@ -704,10 +783,10 @@ private module ArgumentNodes {
704783
}
705784
}
706785

707-
private class HashSplatArgumentsNodeImpl extends NodeImpl, THashSplatArgumentsNode {
786+
private class SynthHashSplatArgumentNodeImpl extends NodeImpl, TSynthHashSplatArgumentNode {
708787
CfgNodes::ExprNodes::CallCfgNode c;
709788

710-
HashSplatArgumentsNodeImpl() { this = THashSplatArgumentsNode(c) }
789+
SynthHashSplatArgumentNodeImpl() { this = TSynthHashSplatArgumentNode(c) }
711790

712791
override CfgScope getCfgScope() { result = c.getExpr().getCfgScope() }
713792

@@ -929,7 +1008,7 @@ predicate storeStep(Node node1, ContentSet c, Node node2) {
9291008
or
9301009
// Wrap all keyword arguments in a synthesized hash-splat argument node
9311010
exists(CfgNodes::ExprNodes::CallCfgNode call, ArgumentPosition keywordPos, string name |
932-
node2 = THashSplatArgumentsNode(call) and
1011+
node2 = TSynthHashSplatArgumentNode(call) and
9331012
node1.asExpr().(Argument).isArgumentOf(call, keywordPos) and
9341013
keywordPos.isKeyword(name) and
9351014
c = getKeywordContent(name)
@@ -962,6 +1041,8 @@ predicate readStep(Node node1, ContentSet c, Node node2) {
9621041
))
9631042
)
9641043
or
1044+
node2 = node1.(SynthHashSplatParameterNode).getAKeywordParameter(c)
1045+
or
9651046
FlowSummaryImpl::Private::Steps::summaryReadStep(node1, c, node2)
9661047
}
9671048

ruby/ql/test/library-tests/dataflow/params/params-flow.expected

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,16 @@ edges
2626
| params_flow.rb:35:12:35:20 | call to taint : | params_flow.rb:25:12:25:13 | p1 : |
2727
| params_flow.rb:35:23:35:28 | ** ... [element :p3] : | params_flow.rb:25:17:25:24 | **kwargs [element :p3] : |
2828
| params_flow.rb:35:25:35:28 | args [element :p3] : | params_flow.rb:35:23:35:28 | ** ... [element :p3] : |
29+
| params_flow.rb:37:16:37:24 | call to taint : | params_flow.rb:38:10:38:13 | args [element :p1] : |
30+
| params_flow.rb:37:34:37:42 | call to taint : | params_flow.rb:38:10:38:13 | args [element :p2] : |
31+
| params_flow.rb:38:8:38:13 | ** ... [element :p1] : | params_flow.rb:25:12:25:13 | p1 : |
32+
| params_flow.rb:38:8:38:13 | ** ... [element :p2] : | params_flow.rb:25:17:25:24 | **kwargs [element :p2] : |
33+
| params_flow.rb:38:10:38:13 | args [element :p1] : | params_flow.rb:38:8:38:13 | ** ... [element :p1] : |
34+
| params_flow.rb:38:10:38:13 | args [element :p2] : | params_flow.rb:38:8:38:13 | ** ... [element :p2] : |
35+
| params_flow.rb:40:16:40:24 | call to taint : | params_flow.rb:41:26:41:29 | args [element :p1] : |
36+
| params_flow.rb:41:13:41:21 | call to taint : | params_flow.rb:16:18:16:19 | p2 : |
37+
| params_flow.rb:41:24:41:29 | ** ... [element :p1] : | params_flow.rb:16:13:16:14 | p1 : |
38+
| params_flow.rb:41:26:41:29 | args [element :p1] : | params_flow.rb:41:24:41:29 | ** ... [element :p1] : |
2939
nodes
3040
| params_flow.rb:9:16:9:17 | p1 : | semmle.label | p1 : |
3141
| params_flow.rb:9:20:9:21 | p2 : | semmle.label | p2 : |
@@ -60,18 +70,32 @@ nodes
6070
| params_flow.rb:35:12:35:20 | call to taint : | semmle.label | call to taint : |
6171
| params_flow.rb:35:23:35:28 | ** ... [element :p3] : | semmle.label | ** ... [element :p3] : |
6272
| params_flow.rb:35:25:35:28 | args [element :p3] : | semmle.label | args [element :p3] : |
73+
| params_flow.rb:37:16:37:24 | call to taint : | semmle.label | call to taint : |
74+
| params_flow.rb:37:34:37:42 | call to taint : | semmle.label | call to taint : |
75+
| params_flow.rb:38:8:38:13 | ** ... [element :p1] : | semmle.label | ** ... [element :p1] : |
76+
| params_flow.rb:38:8:38:13 | ** ... [element :p2] : | semmle.label | ** ... [element :p2] : |
77+
| params_flow.rb:38:10:38:13 | args [element :p1] : | semmle.label | args [element :p1] : |
78+
| params_flow.rb:38:10:38:13 | args [element :p2] : | semmle.label | args [element :p2] : |
79+
| params_flow.rb:40:16:40:24 | call to taint : | semmle.label | call to taint : |
80+
| params_flow.rb:41:13:41:21 | call to taint : | semmle.label | call to taint : |
81+
| params_flow.rb:41:24:41:29 | ** ... [element :p1] : | semmle.label | ** ... [element :p1] : |
82+
| params_flow.rb:41:26:41:29 | args [element :p1] : | semmle.label | args [element :p1] : |
6383
subpaths
6484
#select
6585
| params_flow.rb:10:10:10:11 | p1 | params_flow.rb:14:12:14:19 | call to taint : | params_flow.rb:10:10:10:11 | p1 | $@ | params_flow.rb:14:12:14:19 | call to taint : | call to taint : |
6686
| params_flow.rb:11:10:11:11 | p2 | params_flow.rb:14:22:14:29 | call to taint : | params_flow.rb:11:10:11:11 | p2 | $@ | params_flow.rb:14:22:14:29 | call to taint : | call to taint : |
6787
| params_flow.rb:17:10:17:11 | p1 | params_flow.rb:21:13:21:20 | call to taint : | params_flow.rb:17:10:17:11 | p1 | $@ | params_flow.rb:21:13:21:20 | call to taint : | call to taint : |
6888
| params_flow.rb:17:10:17:11 | p1 | params_flow.rb:22:27:22:34 | call to taint : | params_flow.rb:17:10:17:11 | p1 | $@ | params_flow.rb:22:27:22:34 | call to taint : | call to taint : |
6989
| params_flow.rb:17:10:17:11 | p1 | params_flow.rb:23:33:23:40 | call to taint : | params_flow.rb:17:10:17:11 | p1 | $@ | params_flow.rb:23:33:23:40 | call to taint : | call to taint : |
90+
| params_flow.rb:17:10:17:11 | p1 | params_flow.rb:40:16:40:24 | call to taint : | params_flow.rb:17:10:17:11 | p1 | $@ | params_flow.rb:40:16:40:24 | call to taint : | call to taint : |
7091
| params_flow.rb:18:10:18:11 | p2 | params_flow.rb:21:27:21:34 | call to taint : | params_flow.rb:18:10:18:11 | p2 | $@ | params_flow.rb:21:27:21:34 | call to taint : | call to taint : |
7192
| params_flow.rb:18:10:18:11 | p2 | params_flow.rb:22:13:22:20 | call to taint : | params_flow.rb:18:10:18:11 | p2 | $@ | params_flow.rb:22:13:22:20 | call to taint : | call to taint : |
7293
| params_flow.rb:18:10:18:11 | p2 | params_flow.rb:23:16:23:23 | call to taint : | params_flow.rb:18:10:18:11 | p2 | $@ | params_flow.rb:23:16:23:23 | call to taint : | call to taint : |
94+
| params_flow.rb:18:10:18:11 | p2 | params_flow.rb:41:13:41:21 | call to taint : | params_flow.rb:18:10:18:11 | p2 | $@ | params_flow.rb:41:13:41:21 | call to taint : | call to taint : |
7395
| params_flow.rb:26:10:26:11 | p1 | params_flow.rb:33:12:33:19 | call to taint : | params_flow.rb:26:10:26:11 | p1 | $@ | params_flow.rb:33:12:33:19 | call to taint : | call to taint : |
7496
| params_flow.rb:26:10:26:11 | p1 | params_flow.rb:35:12:35:20 | call to taint : | params_flow.rb:26:10:26:11 | p1 | $@ | params_flow.rb:35:12:35:20 | call to taint : | call to taint : |
97+
| params_flow.rb:26:10:26:11 | p1 | params_flow.rb:37:16:37:24 | call to taint : | params_flow.rb:26:10:26:11 | p1 | $@ | params_flow.rb:37:16:37:24 | call to taint : | call to taint : |
7598
| params_flow.rb:28:10:28:22 | ( ... ) | params_flow.rb:33:26:33:34 | call to taint : | params_flow.rb:28:10:28:22 | ( ... ) | $@ | params_flow.rb:33:26:33:34 | call to taint : | call to taint : |
99+
| params_flow.rb:28:10:28:22 | ( ... ) | params_flow.rb:37:34:37:42 | call to taint : | params_flow.rb:28:10:28:22 | ( ... ) | $@ | params_flow.rb:37:34:37:42 | call to taint : | call to taint : |
76100
| params_flow.rb:29:10:29:22 | ( ... ) | params_flow.rb:33:41:33:49 | call to taint : | params_flow.rb:29:10:29:22 | ( ... ) | $@ | params_flow.rb:33:41:33:49 | call to taint : | call to taint : |
77101
| params_flow.rb:29:10:29:22 | ( ... ) | params_flow.rb:34:14:34:22 | call to taint : | params_flow.rb:29:10:29:22 | ( ... ) | $@ | params_flow.rb:34:14:34:22 | call to taint : | call to taint : |

ruby/ql/test/library-tests/dataflow/params/params_flow.rb

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,22 +14,28 @@ def positional(p1, p2)
1414
positional(taint(1), taint(2))
1515

1616
def keyword(p1:, p2:)
17-
sink p1 # $ hasValueFlow=3 $ hasValueFlow=6 $ hasValueFlow=8
18-
sink p2 # $ hasValueFlow=4 $ hasValueFlow=5 $ hasValueFlow=7
17+
sink p1 # $ hasValueFlow=3 $ hasValueFlow=6 $ hasValueFlow=8 $ hasValueFlow=16
18+
sink p2 # $ hasValueFlow=4 $ hasValueFlow=5 $ hasValueFlow=7 $ hasValueFlow=17
1919
end
2020

2121
keyword(p1: taint(3), p2: taint(4))
2222
keyword(p2: taint(5), p1: taint(6))
2323
keyword(:p2 => taint(7), :p1 => taint(8))
2424

2525
def kwargs(p1:, **kwargs)
26-
sink p1 # $ hasValueFlow=9 $ hasValueFlow=13
26+
sink p1 # $ hasValueFlow=9 $ hasValueFlow=13 $ hasValueFlow=14
2727
sink (kwargs[:p1])
28-
sink (kwargs[:p2]) # $ hasValueFlow=10
28+
sink (kwargs[:p2]) # $ hasValueFlow=10 $ hasValueFlow=15
2929
sink (kwargs[:p3]) # $ hasValueFlow=11 $ hasValueFlow=12
3030
sink (kwargs[:p4])
3131
end
3232

3333
kwargs(p1: taint(9), p2: taint(10), p3: taint(11), p4: "")
3434
args = { p3: taint(12), p4: "" }
3535
kwargs(p1: taint(13), **args)
36+
37+
args = {:p1 => taint(14), :p2 => taint(15) }
38+
kwargs(**args)
39+
40+
args = {:p1 => taint(16) }
41+
keyword(p2: taint(17), **args)

0 commit comments

Comments
 (0)