Skip to content

Commit 02c4966

Browse files
authored
Merge pull request #7878 from asgerf/dot-separated-access-paths
Shared: Switch to dot-separated access paths in summary specs
2 parents 9196b64 + 7848fce commit 02c4966

File tree

101 files changed

+9887
-9627
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

101 files changed

+9887
-9627
lines changed

config/identical-files.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -502,5 +502,11 @@
502502
"javascript/ql/lib/tutorial.qll",
503503
"python/ql/lib/tutorial.qll",
504504
"ruby/ql/lib/tutorial.qll"
505+
],
506+
"AccessPathSyntax": [
507+
"csharp/ql/lib/semmle/code/csharp/dataflow/internal/AccessPathSyntax.qll",
508+
"java/ql/lib/semmle/code/java/dataflow/internal/AccessPathSyntax.qll",
509+
"javascript/ql/lib/semmle/javascript/frameworks/data/internal/AccessPathSyntax.qll",
510+
"ruby/ql/lib/codeql/ruby/dataflow/internal/AccessPathSyntax.qll"
505511
]
506512
}

csharp/ql/lib/semmle/code/csharp/dataflow/ExternalFlow.qll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
*/
7373

7474
import csharp
75+
private import internal.AccessPathSyntax
7576
private import internal.DataFlowDispatch
7677
private import internal.DataFlowPrivate
7778
private import internal.DataFlowPublic
@@ -300,7 +301,7 @@ module CsvValidation {
300301
msg = "Unrecognized extra API graph element \"" + ext + "\" in " + pred + " model."
301302
)
302303
or
303-
exists(string pred, string input, string part |
304+
exists(string pred, AccessPath input, string part |
304305
sinkModel(_, _, _, _, _, _, input, _) and pred = "sink"
305306
or
306307
summaryModel(_, _, _, _, _, _, input, _, _) and pred = "summary"
@@ -311,7 +312,7 @@ module CsvValidation {
311312
not (part = "Argument" and pred = "sink") and
312313
not parseArg(part, _)
313314
or
314-
specSplit(input, part, _) and
315+
part = input.getToken(_) and
315316
parseParam(part, _)
316317
) and
317318
msg = "Unrecognized input specification \"" + part + "\" in " + pred + " model."
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/**
2+
* Module for parsing access paths from CSV models, both the identifying access path used
3+
* by dynamic languages, and the input/output specifications for summary steps.
4+
*
5+
* This file is used by the shared data flow library and by the JavaScript libraries
6+
* (which does not use the shared data flow libraries).
7+
*/
8+
9+
/** Companion module to the `AccessPath` class. */
10+
module AccessPath {
11+
/** A string that should be parsed as an access path. */
12+
abstract class Range extends string {
13+
bindingset[this]
14+
Range() { any() }
15+
}
16+
}
17+
18+
/** Gets the `n`th token on the access path as a string. */
19+
private string getRawToken(AccessPath path, int n) {
20+
// Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`.
21+
// Instead use regexpFind to match valid tokens, and supplement with a final length
22+
// check (in `AccessPath.hasSyntaxError`) to ensure all characters were included in a token.
23+
result = path.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _)
24+
}
25+
26+
/**
27+
* A string that occurs as an access path (either identifying or input/output spec)
28+
* which might be relevant for this database.
29+
*/
30+
class AccessPath extends string instanceof AccessPath::Range {
31+
/** Holds if this string is not a syntactically valid access path. */
32+
predicate hasSyntaxError() {
33+
// If the lengths match, all characters must haven been included in a token
34+
// or seen by the `.` lookahead pattern.
35+
this != "" and
36+
not this.length() = sum(int n | | getRawToken(this, n).length() + 1) - 1
37+
}
38+
39+
/** Gets the `n`th token on the access path (if there are no syntax errors). */
40+
AccessPathToken getToken(int n) {
41+
result = getRawToken(this, n) and
42+
not this.hasSyntaxError()
43+
}
44+
45+
/** Gets the number of tokens on the path (if there are no syntax errors). */
46+
int getNumToken() {
47+
result = count(int n | exists(getRawToken(this, n))) and
48+
not this.hasSyntaxError()
49+
}
50+
}
51+
52+
/**
53+
* An access part token such as `Argument[1]` or `ReturnValue`, appearing in one or more access paths.
54+
*/
55+
class AccessPathToken extends string {
56+
AccessPathToken() { this = getRawToken(any(AccessPath path), _) }
57+
58+
private string getPart(int part) {
59+
result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part)
60+
}
61+
62+
/** Gets the name of the token, such as `Member` from `Member[x]` */
63+
string getName() { result = this.getPart(1) }
64+
65+
/**
66+
* Gets the argument list, such as `1,2` from `Member[1,2]`,
67+
* or has no result if there are no arguments.
68+
*/
69+
string getArgumentList() { result = this.getPart(2) }
70+
71+
/** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */
72+
string getArgument(int n) { result = this.getArgumentList().splitAt(",", n).trim() }
73+
74+
/** Gets an argument to this token, such as `x` or `y` from `Member[x,y]`. */
75+
string getAnArgument() { result = this.getArgument(_) }
76+
77+
/** Gets the number of arguments to this token, such as 2 for `Member[x,y]` or zero for `ReturnValue`. */
78+
int getNumArgument() { result = count(int n | exists(this.getArgument(n))) }
79+
}

csharp/ql/lib/semmle/code/csharp/dataflow/internal/FlowSummaryImpl.qll

Lines changed: 76 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ module Public {
9999
exists(SummaryComponent head, SummaryComponentStack tail |
100100
head = this.head() and
101101
tail = this.tail() and
102-
result = head + " of " + tail
102+
result = tail + "." + head
103103
)
104104
or
105105
exists(SummaryComponent c |
@@ -164,7 +164,7 @@ module Public {
164164
exists(SummaryComponent head, SummaryComponentStack tail |
165165
head = stack.head() and
166166
tail = stack.tail() and
167-
result = getComponentCsv(head) + " of " + getComponentStackCsv(tail)
167+
result = getComponentStackCsv(tail) + "." + getComponentCsv(head)
168168
)
169169
or
170170
exists(SummaryComponent c |
@@ -228,6 +228,7 @@ module Public {
228228
*/
229229
module Private {
230230
private import Public
231+
import AccessPathSyntax
231232

232233
newtype TSummaryComponent =
233234
TContentSummaryComponent(Content c) or
@@ -811,84 +812,60 @@ module Private {
811812
sinkElement(_, spec, _)
812813
}
813814

814-
/** Holds if the `n`th component of specification `s` is `c`. */
815-
predicate specSplit(string s, string c, int n) { relevantSpec(s) and s.splitAt(" of ", n) = c }
816-
817-
/** Holds if specification `s` has length `len`. */
818-
predicate specLength(string s, int len) { len = 1 + max(int n | specSplit(s, _, n)) }
819-
820-
/** Gets the last component of specification `s`. */
821-
string specLast(string s) {
822-
exists(int len |
823-
specLength(s, len) and
824-
specSplit(s, result, len - 1)
825-
)
815+
private class AccessPathRange extends AccessPath::Range {
816+
AccessPathRange() { relevantSpec(this) }
826817
}
827818

828819
/** Holds if specification component `c` parses as parameter `n`. */
829-
predicate parseParam(string c, ArgumentPosition pos) {
830-
specSplit(_, c, _) and
831-
exists(string body |
832-
body = c.regexpCapture("Parameter\\[([^\\]]*)\\]", 1) and
833-
pos = parseParamBody(body)
834-
)
820+
predicate parseParam(AccessPathToken token, ArgumentPosition pos) {
821+
token.getName() = "Parameter" and
822+
pos = parseParamBody(token.getAnArgument())
835823
}
836824

837825
/** Holds if specification component `c` parses as argument `n`. */
838-
predicate parseArg(string c, ParameterPosition pos) {
839-
specSplit(_, c, _) and
840-
exists(string body |
841-
body = c.regexpCapture("Argument\\[([^\\]]*)\\]", 1) and
842-
pos = parseArgBody(body)
843-
)
826+
predicate parseArg(AccessPathToken token, ParameterPosition pos) {
827+
token.getName() = "Argument" and
828+
pos = parseArgBody(token.getAnArgument())
844829
}
845830

846-
private SummaryComponent interpretComponent(string c) {
847-
specSplit(_, c, _) and
848-
(
849-
exists(ParameterPosition pos |
850-
parseArg(c, pos) and result = SummaryComponent::argument(pos)
851-
)
852-
or
853-
exists(ArgumentPosition pos |
854-
parseParam(c, pos) and result = SummaryComponent::parameter(pos)
855-
)
856-
or
857-
c = "ReturnValue" and result = SummaryComponent::return(getReturnValueKind())
858-
or
859-
result = interpretComponentSpecific(c)
831+
private SummaryComponent interpretComponent(AccessPathToken token) {
832+
exists(ParameterPosition pos |
833+
parseArg(token, pos) and result = SummaryComponent::argument(pos)
860834
)
835+
or
836+
exists(ArgumentPosition pos |
837+
parseParam(token, pos) and result = SummaryComponent::parameter(pos)
838+
)
839+
or
840+
token = "ReturnValue" and result = SummaryComponent::return(getReturnValueKind())
841+
or
842+
result = interpretComponentSpecific(token)
861843
}
862844

863845
/**
864846
* Holds if `spec` specifies summary component stack `stack`.
865847
*/
866-
predicate interpretSpec(string spec, SummaryComponentStack stack) {
867-
interpretSpec(spec, 0, stack)
848+
predicate interpretSpec(AccessPath spec, SummaryComponentStack stack) {
849+
interpretSpec(spec, spec.getNumToken(), stack)
868850
}
869851

870-
private predicate interpretSpec(string spec, int idx, SummaryComponentStack stack) {
871-
exists(string c |
872-
relevantSpec(spec) and
873-
specLength(spec, idx + 1) and
874-
specSplit(spec, c, idx) and
875-
stack = SummaryComponentStack::singleton(interpretComponent(c))
876-
)
852+
/** Holds if the first `n` tokens of `spec` resolves to `stack`. */
853+
private predicate interpretSpec(AccessPath spec, int n, SummaryComponentStack stack) {
854+
n = 1 and
855+
stack = SummaryComponentStack::singleton(interpretComponent(spec.getToken(0)))
877856
or
878857
exists(SummaryComponent head, SummaryComponentStack tail |
879-
interpretSpec(spec, idx, head, tail) and
858+
interpretSpec(spec, n, head, tail) and
880859
stack = SummaryComponentStack::push(head, tail)
881860
)
882861
}
883862

863+
/** Holds if the first `n` tokens of `spec` resolves to `head` followed by `tail` */
884864
private predicate interpretSpec(
885-
string output, int idx, SummaryComponent head, SummaryComponentStack tail
865+
AccessPath spec, int n, SummaryComponent head, SummaryComponentStack tail
886866
) {
887-
exists(string c |
888-
interpretSpec(output, idx + 1, tail) and
889-
specSplit(output, c, idx) and
890-
head = interpretComponent(c)
891-
)
867+
interpretSpec(spec, n - 1, tail) and
868+
head = interpretComponent(spec.getToken(n - 1))
892869
}
893870

894871
private class MkStack extends RequiredSummaryComponentStack {
@@ -903,7 +880,7 @@ module Private {
903880
override predicate propagatesFlow(
904881
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
905882
) {
906-
exists(string inSpec, string outSpec, string kind |
883+
exists(AccessPath inSpec, AccessPath outSpec, string kind |
907884
summaryElement(this, inSpec, outSpec, kind) and
908885
interpretSpec(inSpec, input) and
909886
interpretSpec(outSpec, output)
@@ -916,50 +893,56 @@ module Private {
916893
}
917894

918895
/** Holds if component `c` of specification `spec` cannot be parsed. */
919-
predicate invalidSpecComponent(string spec, string c) {
920-
specSplit(spec, c, _) and
896+
predicate invalidSpecComponent(AccessPath spec, string c) {
897+
c = spec.getToken(_) and
921898
not exists(interpretComponent(c))
922899
}
923900

924-
private predicate inputNeedsReference(string c) {
925-
c = "Argument" or
926-
parseArg(c, _) or
901+
private predicate inputNeedsReference(AccessPathToken c) {
902+
c.getName() = "Argument" or
927903
inputNeedsReferenceSpecific(c)
928904
}
929905

930-
private predicate outputNeedsReference(string c) {
931-
c = "Argument" or
932-
parseArg(c, _) or
933-
c = "ReturnValue" or
906+
private predicate outputNeedsReference(AccessPathToken c) {
907+
c.getName() = ["Argument", "ReturnValue"] or
934908
outputNeedsReferenceSpecific(c)
935909
}
936910

937-
private predicate sourceElementRef(InterpretNode ref, string output, string kind) {
911+
private predicate sourceElementRef(InterpretNode ref, AccessPath output, string kind) {
938912
exists(SourceOrSinkElement e |
939913
sourceElement(e, output, kind) and
940-
if outputNeedsReference(specLast(output))
914+
if outputNeedsReference(output.getToken(0))
941915
then e = ref.getCallTarget()
942916
else e = ref.asElement()
943917
)
944918
}
945919

946-
private predicate sinkElementRef(InterpretNode ref, string input, string kind) {
920+
private predicate sinkElementRef(InterpretNode ref, AccessPath input, string kind) {
947921
exists(SourceOrSinkElement e |
948922
sinkElement(e, input, kind) and
949-
if inputNeedsReference(specLast(input))
923+
if inputNeedsReference(input.getToken(0))
950924
then e = ref.getCallTarget()
951925
else e = ref.asElement()
952926
)
953927
}
954928

955-
private predicate interpretOutput(string output, int idx, InterpretNode ref, InterpretNode node) {
929+
/** Holds if the first `n` tokens of `output` resolve to the given interpretation. */
930+
private predicate interpretOutput(
931+
AccessPath output, int n, InterpretNode ref, InterpretNode node
932+
) {
956933
sourceElementRef(ref, output, _) and
957-
specLength(output, idx) and
958-
node = ref
934+
n = 0 and
935+
(
936+
if output = ""
937+
then
938+
// Allow language-specific interpretation of the empty access path
939+
interpretOutputSpecific("", ref, node)
940+
else node = ref
941+
)
959942
or
960-
exists(InterpretNode mid, string c |
961-
interpretOutput(output, idx + 1, ref, mid) and
962-
specSplit(output, c, idx)
943+
exists(InterpretNode mid, AccessPathToken c |
944+
interpretOutput(output, n - 1, ref, mid) and
945+
c = output.getToken(n - 1)
963946
|
964947
exists(ArgumentPosition apos, ParameterPosition ppos |
965948
node.asNode().(PostUpdateNode).getPreUpdateNode().(ArgNode).argumentOf(mid.asCall(), apos) and
@@ -982,14 +965,21 @@ module Private {
982965
)
983966
}
984967

985-
private predicate interpretInput(string input, int idx, InterpretNode ref, InterpretNode node) {
968+
/** Holds if the first `n` tokens of `input` resolve to the given interpretation. */
969+
private predicate interpretInput(AccessPath input, int n, InterpretNode ref, InterpretNode node) {
986970
sinkElementRef(ref, input, _) and
987-
specLength(input, idx) and
988-
node = ref
971+
n = 0 and
972+
(
973+
if input = ""
974+
then
975+
// Allow language-specific interpretation of the empty access path
976+
interpretInputSpecific("", ref, node)
977+
else node = ref
978+
)
989979
or
990-
exists(InterpretNode mid, string c |
991-
interpretInput(input, idx + 1, ref, mid) and
992-
specSplit(input, c, idx)
980+
exists(InterpretNode mid, AccessPathToken c |
981+
interpretInput(input, n - 1, ref, mid) and
982+
c = input.getToken(n - 1)
993983
|
994984
exists(ArgumentPosition apos, ParameterPosition ppos |
995985
node.asNode().(ArgNode).argumentOf(mid.asCall(), apos) and
@@ -1014,9 +1004,9 @@ module Private {
10141004
* model.
10151005
*/
10161006
predicate isSourceNode(InterpretNode node, string kind) {
1017-
exists(InterpretNode ref, string output |
1007+
exists(InterpretNode ref, AccessPath output |
10181008
sourceElementRef(ref, output, kind) and
1019-
interpretOutput(output, 0, ref, node)
1009+
interpretOutput(output, output.getNumToken(), ref, node)
10201010
)
10211011
}
10221012

@@ -1025,9 +1015,9 @@ module Private {
10251015
* model.
10261016
*/
10271017
predicate isSinkNode(InterpretNode node, string kind) {
1028-
exists(InterpretNode ref, string input |
1018+
exists(InterpretNode ref, AccessPath input |
10291019
sinkElementRef(ref, input, kind) and
1030-
interpretInput(input, 0, ref, node)
1020+
interpretInput(input, input.getNumToken(), ref, node)
10311021
)
10321022
}
10331023
}

0 commit comments

Comments
 (0)