Skip to content

Commit e622e51

Browse files
authored
Merge pull request #7800 from github/henrymercer/js-atm-add-model-building-pack
JS: Add model building pack for ML-powered queries
2 parents fb00a6c + 1460131 commit e622e51

30 files changed

+993
-0
lines changed

.codeqlmanifest.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
"*/ql/examples/qlpack.yml",
77
"cpp/ql/test/query-tests/Security/CWE/CWE-190/semmle/tainted/qlpack.yml",
88
"javascript/ql/experimental/adaptivethreatmodeling/lib/qlpack.yml",
9+
"javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/qlpack.yml",
910
"javascript/ql/experimental/adaptivethreatmodeling/src/qlpack.yml",
1011
"csharp/ql/campaigns/Solorigate/lib/qlpack.yml",
1112
"csharp/ql/campaigns/Solorigate/src/qlpack.yml",
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/**
2+
* @name Debug result inclusion
3+
* @description Use this query to understand why some alerts are included or excluded from the
4+
* results of boosted queries. The results for this query are the union of the alerts
5+
* generated by each boosted query. Each alert includes an explanation why it was
6+
* included or excluded for each of the four security queries.
7+
* @kind problem
8+
* @problem.severity error
9+
* @id adaptive-threat-modeling/js/debug-result-inclusion
10+
*/
11+
12+
import javascript
13+
import experimental.adaptivethreatmodeling.ATMConfig
14+
import extraction.ExtractEndpointData
15+
16+
string getAReasonSinkExcluded(DataFlow::Node sinkCandidate, Query query) {
17+
query instanceof NosqlInjectionQuery and
18+
result = NosqlInjectionATM::SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate)
19+
or
20+
query instanceof SqlInjectionQuery and
21+
result = SqlInjectionATM::SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate)
22+
or
23+
query instanceof TaintedPathQuery and
24+
result = TaintedPathATM::SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate)
25+
or
26+
query instanceof XssQuery and
27+
result = XssATM::SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate)
28+
}
29+
30+
pragma[inline]
31+
string getDescriptionForAlertCandidate(
32+
DataFlow::Node sourceCandidate, DataFlow::Node sinkCandidate, Query query
33+
) {
34+
result = "excluded[reason=" + getAReasonSinkExcluded(sinkCandidate, query) + "]"
35+
or
36+
getATMCfg(query).isKnownSink(sinkCandidate) and
37+
result = "excluded[reason=known-sink]"
38+
or
39+
not exists(getAReasonSinkExcluded(sinkCandidate, query)) and
40+
not getDataFlowCfg(query).hasFlow(sourceCandidate, sinkCandidate) and
41+
(
42+
if
43+
getDataFlowCfg(query).isSource(sourceCandidate) or
44+
getDataFlowCfg(query).isSource(sourceCandidate, _)
45+
then result = "no flow"
46+
else result = "not a known source"
47+
)
48+
or
49+
getDataFlowCfg(query).hasFlow(sourceCandidate, sinkCandidate) and
50+
result = "included"
51+
}
52+
53+
pragma[inline]
54+
string getDescriptionForAlert(DataFlow::Node sourceCandidate, DataFlow::Node sinkCandidate) {
55+
result =
56+
concat(Query query |
57+
|
58+
query.getName() + ": " +
59+
getDescriptionForAlertCandidate(sourceCandidate, sinkCandidate, query), ", "
60+
)
61+
}
62+
63+
from DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink
64+
where cfg.hasFlow(source, sink)
65+
select sink,
66+
"This is an ATM result that may depend on $@ [" + getDescriptionForAlert(source, sink) + "]",
67+
source, "a user-provided value"
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
private import javascript
2+
private import extraction.Exclusions as Exclusions
3+
4+
/**
5+
* Holds if the flow from `source` to `sink` should be excluded from the results of an end-to-end
6+
* evaluation query.
7+
*/
8+
pragma[inline]
9+
predicate isFlowExcluded(DataFlow::Node source, DataFlow::Node sink) {
10+
Exclusions::isFileExcluded([source.getFile(), sink.getFile()])
11+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/**
2+
* EndpointScoresIntegrationTest.ql
3+
*
4+
* Extract scores for each test endpoint that is an argument to a function call in the database.
5+
* This is used by integration tests to verify that QL and the modeling codebase agree on the scores
6+
* of a set of test endpoints.
7+
*/
8+
9+
import javascript
10+
import experimental.adaptivethreatmodeling.ATMConfig
11+
import experimental.adaptivethreatmodeling.FeaturizationConfig
12+
import experimental.adaptivethreatmodeling.EndpointScoring::ModelScoring as ModelScoring
13+
14+
/**
15+
* A featurization config that featurizes endpoints that are arguments to function calls.
16+
*
17+
* This should only be used in extraction queries and tests.
18+
*/
19+
class FunctionArgumentFeaturizationConfig extends FeaturizationConfig {
20+
FunctionArgumentFeaturizationConfig() { this = "FunctionArgumentFeaturization" }
21+
22+
override DataFlow::Node getAnEndpointToFeaturize() {
23+
exists(DataFlow::CallNode call | result = call.getAnArgument())
24+
}
25+
}
26+
27+
query predicate endpointScores = ModelScoring::endpointScores/3;
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
/**
2+
* ModelCheck.ql
3+
*
4+
* Returns checksums of ATM models.
5+
*/
6+
7+
/**
8+
* The `availableMlModels` template predicate.
9+
*
10+
* This is populated by the evaluator with metadata for the available machine learning models.
11+
*/
12+
external predicate availableMlModels(
13+
string modelChecksum, string modelLanguage, string modelName, string modelType
14+
);
15+
16+
select any(string checksum | availableMlModels(checksum, "javascript", _, _))
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/**
2+
* NosqlInjection.ql
3+
*
4+
* Version of the standard NoSQL injection query with an output relation ready to plug into the
5+
* evaluation pipeline.
6+
*/
7+
8+
import semmle.javascript.security.dataflow.NosqlInjection
9+
import EndToEndEvaluation as EndToEndEvaluation
10+
11+
from
12+
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
13+
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
14+
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource
15+
where
16+
cfg instanceof NosqlInjection::Configuration and
17+
cfg.hasFlow(source, sink) and
18+
not EndToEndEvaluation::isFlowExcluded(source, sink) and
19+
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
20+
source
21+
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
22+
endColumnSource)
23+
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
24+
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/**
2+
* NosqlInjectionATM.ql
3+
*
4+
* Version of the boosted NoSQL injection query with an output relation ready to plug into the
5+
* evaluation pipeline.
6+
*/
7+
8+
import ATM::ResultsInfo
9+
import EndToEndEvaluation as EndToEndEvaluation
10+
import experimental.adaptivethreatmodeling.NosqlInjectionATM
11+
12+
from
13+
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
14+
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
15+
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
16+
where
17+
cfg.hasFlow(source, sink) and
18+
not EndToEndEvaluation::isFlowExcluded(source, sink) and
19+
not isFlowLikelyInBaseQuery(source, sink) and
20+
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
21+
source
22+
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
23+
endColumnSource) and
24+
getScoreForFlow(source, sink) = score
25+
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
26+
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
27+
score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
28+
startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/**
2+
* NosqlInjectionATMLite.ql
3+
*
4+
* Arbitrarily ranked version of the boosted NoSQL injection query with an output relation ready to
5+
* plug into the evaluation pipeline. This is useful (a) for evaluating the performance of endpoint
6+
* filters, and (b) as a baseline to compare the model against.
7+
*/
8+
9+
import ATM::ResultsInfo
10+
import EndToEndEvaluation as EndToEndEvaluation
11+
import experimental.adaptivethreatmodeling.NosqlInjectionATM
12+
13+
from
14+
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
15+
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
16+
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
17+
where
18+
cfg.hasFlow(source, sink) and
19+
not EndToEndEvaluation::isFlowExcluded(source, sink) and
20+
not isFlowLikelyInBaseQuery(source, sink) and
21+
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
22+
source
23+
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
24+
endColumnSource) and
25+
score = 0
26+
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
27+
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
28+
score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
29+
startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/**
2+
* SqlInjection.ql
3+
*
4+
* Version of the standard SQL injection query with an output relation ready to plug into the
5+
* evaluation pipeline.
6+
*/
7+
8+
import semmle.javascript.security.dataflow.SqlInjection
9+
import EndToEndEvaluation as EndToEndEvaluation
10+
11+
from
12+
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
13+
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
14+
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource
15+
where
16+
cfg instanceof SqlInjection::Configuration and
17+
cfg.hasFlow(source, sink) and
18+
not EndToEndEvaluation::isFlowExcluded(source, sink) and
19+
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
20+
source
21+
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
22+
endColumnSource)
23+
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
24+
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/**
2+
* SqlInjectionATM.ql
3+
*
4+
* Version of the boosted SQL injection query with an output relation ready to plug into the
5+
* evaluation pipeline.
6+
*/
7+
8+
import ATM::ResultsInfo
9+
import EndToEndEvaluation as EndToEndEvaluation
10+
import experimental.adaptivethreatmodeling.SqlInjectionATM
11+
12+
from
13+
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
14+
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
15+
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
16+
where
17+
cfg.hasFlow(source, sink) and
18+
not EndToEndEvaluation::isFlowExcluded(source, sink) and
19+
not isFlowLikelyInBaseQuery(source, sink) and
20+
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
21+
source
22+
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
23+
endColumnSource) and
24+
getScoreForFlow(source, sink) = score
25+
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
26+
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
27+
score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
28+
startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink

0 commit comments

Comments
 (0)