Merge pull request #8433 from github/henrymercer/js-atm-remove-isEffectiveSinkWithOverridingScore

henrymercer · web-flow · commit f38b498eede3 · 2022-03-15T10:04:30.000Z
JS: Remove `isEffectiveSinkWithOverridingScore` from ML-powered libraries
diff --git a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/ATMConfig.qll b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/ATMConfig.qll
@@ -62,28 +62,6 @@ abstract class AtmConfig extends string {
    */
   predicate isEffectiveSink(raw::DataFlow::Node candidateSink) { none() }
 
-  /**
-   * EXPERIMENTAL. This API may change in the future.
-   *
-   * Holds if the candidate sink `candidateSink` predicted by the machine learning model should be
-   * an effective sink that overrides the score provided by the machine learning model with the
-   * score `score` for reason `why`. The effective sinks identified by this predicate MUST be a
-   * subset of those identified by the `isEffectiveSink` predicate.
-   *
-   * For example, in the ATM external API query, we use this method to ensure the ATM external API
-   * query produces the same results as the standard external API query, but assigns flows
-   * involving sinks that are filtered out by the endpoint filters a score of 0.
-   *
-   * This predicate can be phased out once we no longer need to rely on predicates like
-   * `paddedScore` in the ATM CodeQL libraries to add scores to alert messages in a way that works
-   * with lexical sort orders.
-   */
-  predicate isEffectiveSinkWithOverridingScore(
-    raw::DataFlow::Node candidateSink, float score, string why
-  ) {
-    none()
-  }
-
   /**
    * EXPERIMENTAL. This API may change in the future.
    *
diff --git a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll
@@ -62,14 +62,11 @@ private float getScoreForSource(DataFlow::Node source) {
 private float getScoreForSink(DataFlow::Node sink) {
   if getCfg().isKnownSink(sink)
   then result = 1.0
-  else
-    if getCfg().isEffectiveSinkWithOverridingScore(sink, result, _)
-    then any()
-    else (
-      // This restriction on `sink` has no semantic effect but improves performance.
-      getCfg().isEffectiveSink(sink) and
-      ModelScoring::endpointScores(sink, getCfg().getASinkEndpointType().getEncoding(), result)
-    )
+  else (
+    // This restriction on `sink` has no semantic effect but improves performance.
+    getCfg().isEffectiveSink(sink) and
+    ModelScoring::endpointScores(sink, getCfg().getASinkEndpointType().getEncoding(), result)
+  )
 }
 
 class EndpointScoringResults extends ScoringResults {
@@ -109,10 +106,6 @@ class EndpointScoringResults extends ScoringResults {
     result = "known" and getCfg().isKnownSink(sink)
     or
     not getCfg().isKnownSink(sink) and
-    getCfg().isEffectiveSinkWithOverridingScore(sink, _, result)
-    or
-    not getCfg().isKnownSink(sink) and
-    not getCfg().isEffectiveSinkWithOverridingScore(sink, _, _) and
     result =
       "predicted (scores: " +
         concat(EndpointType type, float score |
@@ -127,29 +120,21 @@ class EndpointScoringResults extends ScoringResults {
   override predicate shouldResultBeIncluded(DataFlow::Node source, DataFlow::Node sink) {
     if getCfg().isKnownSink(sink)
     then any()
-    else
-      if getCfg().isEffectiveSinkWithOverridingScore(sink, _, _)
-      then
-        exists(float score |
-          getCfg().isEffectiveSinkWithOverridingScore(sink, score, _) and
-          score >= getCfg().getScoreCutoff()
-        )
-      else (
-        // This restriction on `sink` has no semantic effect but improves performance.
-        getCfg().isEffectiveSink(sink) and
-        exists(float sinkScore |
-          ModelScoring::endpointScores(sink, getCfg().getASinkEndpointType().getEncoding(),
-            sinkScore) and
-          // Include the endpoint if (a) the query endpoint type scores higher than all other
-          // endpoint types, or (b) the query endpoint type scores at least
-          // 0.5 - (getCfg().getScoreCutoff() / 2).
-          sinkScore >=
-            [
-              max(float s | ModelScoring::endpointScores(sink, _, s)),
-              0.5 - getCfg().getScoreCutoff() / 2
-            ]
-        )
+    else (
+      // This restriction on `sink` has no semantic effect but improves performance.
+      getCfg().isEffectiveSink(sink) and
+      exists(float sinkScore |
+        ModelScoring::endpointScores(sink, getCfg().getASinkEndpointType().getEncoding(), sinkScore) and
+        // Include the endpoint if (a) the query endpoint type scores higher than all other
+        // endpoint types, or (b) the query endpoint type scores at least
+        // 0.5 - (getCfg().getScoreCutoff() / 2).
+        sinkScore >=
+          [
+            max(float s | ModelScoring::endpointScores(sink, _, s)),
+            0.5 - getCfg().getScoreCutoff() / 2
+          ]
       )
+    )
   }
 }
 
diff --git a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointData.qll b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointData.qll
@@ -74,10 +74,7 @@ private DataFlow::Node getANotASink(NotASinkReason reason) {
  * specified query.
  */
 private DataFlow::Node getAnUnknown(Query query) {
-  (
-    getAtmCfg(query).isEffectiveSink(result) or
-    getAtmCfg(query).isEffectiveSinkWithOverridingScore(result, _, _)
-  ) and
+  getAtmCfg(query).isEffectiveSink(result) and
   not result = getASink(query) and
   // Only consider the source code for the project being analyzed.
   exists(result.getFile().getRelativePath())