Skip to content

Commit a8bfeba

Browse files
authored
Merge pull request #8149 from asgerf/shared/use-shared-access-path-syntax
Shared: use shared access path syntax to parse arguments in CSV rows
2 parents 0f125d1 + f1bfb31 commit a8bfeba

File tree

9 files changed

+422
-144
lines changed

9 files changed

+422
-144
lines changed

csharp/ql/lib/semmle/code/csharp/dataflow/internal/AccessPathSyntax.qll

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,111 @@
66
* (which does not use the shared data flow libraries).
77
*/
88

9+
/**
10+
* Convenience-predicate for extracting two capture groups at once.
11+
*/
12+
bindingset[input, regexp]
13+
private predicate regexpCaptureTwo(string input, string regexp, string capture1, string capture2) {
14+
capture1 = input.regexpCapture(regexp, 1) and
15+
capture2 = input.regexpCapture(regexp, 2)
16+
}
17+
918
/** Companion module to the `AccessPath` class. */
1019
module AccessPath {
1120
/** A string that should be parsed as an access path. */
1221
abstract class Range extends string {
1322
bindingset[this]
1423
Range() { any() }
1524
}
25+
26+
/**
27+
* Parses an integer constant `n` or interval `n1..n2` (inclusive) and gets the value
28+
* of the constant or any value contained in the interval.
29+
*/
30+
bindingset[arg]
31+
int parseInt(string arg) {
32+
result = arg.toInt()
33+
or
34+
// Match "n1..n2"
35+
exists(string lo, string hi |
36+
regexpCaptureTwo(arg, "(-?\\d+)\\.\\.(-?\\d+)", lo, hi) and
37+
result = [lo.toInt() .. hi.toInt()]
38+
)
39+
}
40+
41+
/**
42+
* Parses a lower-bounded interval `n..` and gets the lower bound.
43+
*/
44+
bindingset[arg]
45+
private int parseLowerBound(string arg) {
46+
result = arg.regexpCapture("(-?\\d+)\\.\\.", 1).toInt()
47+
}
48+
49+
/**
50+
* Parses an integer constant or interval (bounded or unbounded) that explicitly
51+
* references the arity, such as `N-1` or `N-3..N-1`.
52+
*
53+
* Note that expressions of form `N-x` will never resolve to a negative index,
54+
* even if `N` is zero (it will have no result in that case).
55+
*/
56+
bindingset[arg, arity]
57+
private int parseIntWithExplicitArity(string arg, int arity) {
58+
result >= 0 and // do not allow N-1 to resolve to a negative index
59+
exists(string lo |
60+
// N-x
61+
lo = arg.regexpCapture("N-(\\d+)", 1) and
62+
result = arity - lo.toInt()
63+
or
64+
// N-x..
65+
lo = arg.regexpCapture("N-(\\d+)\\.\\.", 1) and
66+
result = [arity - lo.toInt(), arity - 1]
67+
)
68+
or
69+
exists(string lo, string hi |
70+
// x..N-y
71+
regexpCaptureTwo(arg, "(-?\\d+)\\.\\.N-(\\d+)", lo, hi) and
72+
result = [lo.toInt() .. arity - hi.toInt()]
73+
or
74+
// N-x..N-y
75+
regexpCaptureTwo(arg, "N-(\\d+)\\.\\.N-(\\d+)", lo, hi) and
76+
result = [arity - lo.toInt() .. arity - hi.toInt()] and
77+
result >= 0
78+
or
79+
// N-x..y
80+
regexpCaptureTwo(arg, "N-(\\d+)\\.\\.(\\d+)", lo, hi) and
81+
result = [arity - lo.toInt() .. hi.toInt()] and
82+
result >= 0
83+
)
84+
}
85+
86+
/**
87+
* Parses an integer constant or interval (bounded or unbounded) and gets any
88+
* of the integers contained within (of which there may be infinitely many).
89+
*
90+
* Has no result for arguments involving an explicit arity, such as `N-1`.
91+
*/
92+
bindingset[arg, result]
93+
int parseIntUnbounded(string arg) {
94+
result = parseInt(arg)
95+
or
96+
result >= parseLowerBound(arg)
97+
}
98+
99+
/**
100+
* Parses an integer constant or interval (bounded or unbounded) that
101+
* may reference the arity of a call, such as `N-1` or `N-3..N-1`.
102+
*
103+
* Note that expressions of form `N-x` will never resolve to a negative index,
104+
* even if `N` is zero (it will have no result in that case).
105+
*/
106+
bindingset[arg, arity]
107+
int parseIntWithArity(string arg, int arity) {
108+
result = parseInt(arg)
109+
or
110+
result in [parseLowerBound(arg) .. arity - 1]
111+
or
112+
result = parseIntWithExplicitArity(arg, arity)
113+
}
16114
}
17115

18116
/** Gets the `n`th token on the access path as a string. */

csharp/ql/lib/semmle/code/csharp/dataflow/internal/FlowSummaryImplSpecific.qll

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -124,21 +124,26 @@ predicate sinkElement(Element e, string input, string kind) {
124124

125125
/** Gets the summary component for specification component `c`, if any. */
126126
bindingset[c]
127-
SummaryComponent interpretComponentSpecific(string c) {
127+
SummaryComponent interpretComponentSpecific(AccessPathToken c) {
128128
c = "Element" and result = SummaryComponent::content(any(ElementContent ec))
129129
or
130+
// Qualified names may contain commas,such as in `Tuple<,>`, so get the entire argument list
131+
// rather than an individual argument.
130132
exists(Field f |
131-
c.regexpCapture("Field\\[(.+)\\]", 1) = f.getQualifiedName() and
133+
c.getName() = "Field" and
134+
c.getArgumentList() = f.getQualifiedName() and
132135
result = SummaryComponent::content(any(FieldContent fc | fc.getField() = f))
133136
)
134137
or
135138
exists(Property p |
136-
c.regexpCapture("Property\\[(.+)\\]", 1) = p.getQualifiedName() and
139+
c.getName() = "Property" and
140+
c.getArgumentList() = p.getQualifiedName() and
137141
result = SummaryComponent::content(any(PropertyContent pc | pc.getProperty() = p))
138142
)
139143
or
140144
exists(SyntheticField f |
141-
c.regexpCapture("SyntheticField\\[(.+)\\]", 1) = f and
145+
c.getName() = "SyntheticField" and
146+
c.getArgumentList() = f and
142147
result = SummaryComponent::content(any(SyntheticFieldContent sfc | sfc.getField() = f))
143148
)
144149
}
@@ -253,21 +258,10 @@ predicate interpretInputSpecific(string c, InterpretNode mid, InterpretNode n) {
253258
)
254259
}
255260

256-
bindingset[s]
257-
private int parseIntegerPosition(string s) {
258-
result = s.regexpCapture("([0-9]+)", 1).toInt()
259-
or
260-
exists(int n1, int n2 |
261-
s.regexpCapture("([0-9]+)\\.\\.([0-9]+)", 1).toInt() = n1 and
262-
s.regexpCapture("([0-9]+)\\.\\.([0-9]+)", 2).toInt() = n2 and
263-
result in [n1 .. n2]
264-
)
265-
}
266-
267261
/** Gets the argument position obtained by parsing `X` in `Parameter[X]`. */
268262
bindingset[s]
269263
ArgumentPosition parseParamBody(string s) {
270-
result.getPosition() = parseIntegerPosition(s)
264+
result.getPosition() = AccessPath::parseInt(s)
271265
or
272266
s = "This" and
273267
result.isQualifier()
@@ -276,7 +270,7 @@ ArgumentPosition parseParamBody(string s) {
276270
/** Gets the parameter position obtained by parsing `X` in `Argument[X]`. */
277271
bindingset[s]
278272
ParameterPosition parseArgBody(string s) {
279-
result.getPosition() = parseIntegerPosition(s)
273+
result.getPosition() = AccessPath::parseInt(s)
280274
or
281275
s = "Qualifier" and
282276
result.isThisParameter()

java/ql/lib/semmle/code/java/dataflow/internal/AccessPathSyntax.qll

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,111 @@
66
* (which does not use the shared data flow libraries).
77
*/
88

9+
/**
10+
* Convenience-predicate for extracting two capture groups at once.
11+
*/
12+
bindingset[input, regexp]
13+
private predicate regexpCaptureTwo(string input, string regexp, string capture1, string capture2) {
14+
capture1 = input.regexpCapture(regexp, 1) and
15+
capture2 = input.regexpCapture(regexp, 2)
16+
}
17+
918
/** Companion module to the `AccessPath` class. */
1019
module AccessPath {
1120
/** A string that should be parsed as an access path. */
1221
abstract class Range extends string {
1322
bindingset[this]
1423
Range() { any() }
1524
}
25+
26+
/**
27+
* Parses an integer constant `n` or interval `n1..n2` (inclusive) and gets the value
28+
* of the constant or any value contained in the interval.
29+
*/
30+
bindingset[arg]
31+
int parseInt(string arg) {
32+
result = arg.toInt()
33+
or
34+
// Match "n1..n2"
35+
exists(string lo, string hi |
36+
regexpCaptureTwo(arg, "(-?\\d+)\\.\\.(-?\\d+)", lo, hi) and
37+
result = [lo.toInt() .. hi.toInt()]
38+
)
39+
}
40+
41+
/**
42+
* Parses a lower-bounded interval `n..` and gets the lower bound.
43+
*/
44+
bindingset[arg]
45+
private int parseLowerBound(string arg) {
46+
result = arg.regexpCapture("(-?\\d+)\\.\\.", 1).toInt()
47+
}
48+
49+
/**
50+
* Parses an integer constant or interval (bounded or unbounded) that explicitly
51+
* references the arity, such as `N-1` or `N-3..N-1`.
52+
*
53+
* Note that expressions of form `N-x` will never resolve to a negative index,
54+
* even if `N` is zero (it will have no result in that case).
55+
*/
56+
bindingset[arg, arity]
57+
private int parseIntWithExplicitArity(string arg, int arity) {
58+
result >= 0 and // do not allow N-1 to resolve to a negative index
59+
exists(string lo |
60+
// N-x
61+
lo = arg.regexpCapture("N-(\\d+)", 1) and
62+
result = arity - lo.toInt()
63+
or
64+
// N-x..
65+
lo = arg.regexpCapture("N-(\\d+)\\.\\.", 1) and
66+
result = [arity - lo.toInt(), arity - 1]
67+
)
68+
or
69+
exists(string lo, string hi |
70+
// x..N-y
71+
regexpCaptureTwo(arg, "(-?\\d+)\\.\\.N-(\\d+)", lo, hi) and
72+
result = [lo.toInt() .. arity - hi.toInt()]
73+
or
74+
// N-x..N-y
75+
regexpCaptureTwo(arg, "N-(\\d+)\\.\\.N-(\\d+)", lo, hi) and
76+
result = [arity - lo.toInt() .. arity - hi.toInt()] and
77+
result >= 0
78+
or
79+
// N-x..y
80+
regexpCaptureTwo(arg, "N-(\\d+)\\.\\.(\\d+)", lo, hi) and
81+
result = [arity - lo.toInt() .. hi.toInt()] and
82+
result >= 0
83+
)
84+
}
85+
86+
/**
87+
* Parses an integer constant or interval (bounded or unbounded) and gets any
88+
* of the integers contained within (of which there may be infinitely many).
89+
*
90+
* Has no result for arguments involving an explicit arity, such as `N-1`.
91+
*/
92+
bindingset[arg, result]
93+
int parseIntUnbounded(string arg) {
94+
result = parseInt(arg)
95+
or
96+
result >= parseLowerBound(arg)
97+
}
98+
99+
/**
100+
* Parses an integer constant or interval (bounded or unbounded) that
101+
* may reference the arity of a call, such as `N-1` or `N-3..N-1`.
102+
*
103+
* Note that expressions of form `N-x` will never resolve to a negative index,
104+
* even if `N` is zero (it will have no result in that case).
105+
*/
106+
bindingset[arg, arity]
107+
int parseIntWithArity(string arg, int arity) {
108+
result = parseInt(arg)
109+
or
110+
result in [parseLowerBound(arg) .. arity - 1]
111+
or
112+
result = parseIntWithExplicitArity(arg, arity)
113+
}
16114
}
17115

18116
/** Gets the `n`th token on the access path as a string. */

java/ql/lib/semmle/code/java/dataflow/internal/FlowSummaryImplSpecific.qll

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -200,21 +200,10 @@ predicate interpretInputSpecific(string c, InterpretNode mid, InterpretNode n) {
200200
)
201201
}
202202

203-
bindingset[s]
204-
private int parsePosition(string s) {
205-
result = s.regexpCapture("([-0-9]+)", 1).toInt()
206-
or
207-
exists(int n1, int n2 |
208-
s.regexpCapture("([-0-9]+)\\.\\.([0-9]+)", 1).toInt() = n1 and
209-
s.regexpCapture("([-0-9]+)\\.\\.([0-9]+)", 2).toInt() = n2 and
210-
result in [n1 .. n2]
211-
)
212-
}
213-
214203
/** Gets the argument position obtained by parsing `X` in `Parameter[X]`. */
215204
bindingset[s]
216-
ArgumentPosition parseParamBody(string s) { result = parsePosition(s) }
205+
ArgumentPosition parseParamBody(string s) { result = AccessPath::parseInt(s) }
217206

218207
/** Gets the parameter position obtained by parsing `X` in `Argument[X]`. */
219208
bindingset[s]
220-
ParameterPosition parseArgBody(string s) { result = parsePosition(s) }
209+
ParameterPosition parseArgBody(string s) { result = AccessPath::parseInt(s) }

0 commit comments

Comments
 (0)