Skip to content

Commit 6356b20

Browse files
committed
Ruby: port js/hardcoded-data-interpreted-as-code
1 parent edc8f6f commit 6356b20

File tree

9 files changed

+259
-0
lines changed

9 files changed

+259
-0
lines changed
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
/**
2+
* Provides default sources, sinks and sanitizers for reasoning about hard-coded
3+
* data being interpreted as code, as well as extension points for adding your
4+
* own.
5+
*/
6+
7+
private import codeql.ruby.DataFlow
8+
private import codeql.ruby.security.CodeInjectionCustomizations
9+
private import codeql.ruby.AST as AST
10+
11+
/**
12+
* Provides default sources, sinks and sanitizers for reasoning about hard-coded
13+
* data being interpreted as code, as well as extension points for adding your
14+
* own.
15+
*/
16+
module HardcodedDataInterpretedAsCode {
17+
/**
18+
* Flow states used to distinguish value-preserving flow from taint flow.
19+
*/
20+
module FlowState {
21+
/** Flow state used to track value-preserving flow. */
22+
DataFlow::FlowState data() { result = "data" }
23+
24+
/** Flow state used to tainted data (non-value preserving flow). */
25+
DataFlow::FlowState taint() { result = "taint" }
26+
}
27+
28+
/**
29+
* A data flow source for hard-coded data.
30+
*/
31+
abstract class Source extends DataFlow::Node {
32+
/** Gets a flow label for which this is a source. */
33+
DataFlow::FlowState getLabel() { result = FlowState::data() }
34+
}
35+
36+
/**
37+
* A data flow sink for code injection.
38+
*/
39+
abstract class Sink extends DataFlow::Node {
40+
/** Gets a description of what kind of sink this is. */
41+
abstract string getKind();
42+
43+
/** Gets a flow label for which this is a sink. */
44+
DataFlow::FlowState getLabel() {
45+
// We want to ignore value-flow and only consider taint-flow, since the
46+
// source is just a hex string, and evaluating that directly will just
47+
// cause a syntax error.
48+
result = FlowState::taint()
49+
}
50+
}
51+
52+
/** A sanitizer for hard-coded data. */
53+
abstract class Sanitizer extends DataFlow::Node { }
54+
55+
/**
56+
* A constant string consisting of eight or more hexadecimal characters (including at
57+
* least one digit), viewed as a source of hard-coded data that should not be
58+
* interpreted as code.
59+
*/
60+
private class DefaultSource extends Source, DataFlow::LocalSourceNode {
61+
DefaultSource() {
62+
exists(string val | this.asExpr().getConstantValue().isString(val) |
63+
val.regexpMatch("[0-9a-fA-F]{8,}") and
64+
val.regexpMatch(".*[0-9].*")
65+
)
66+
}
67+
}
68+
69+
/**
70+
* A code injection sink; hard-coded data should not flow here.
71+
*/
72+
private class DefaultCodeInjectionSink extends Sink {
73+
DefaultCodeInjectionSink() { this instanceof CodeInjection::Sink }
74+
75+
override string getKind() { result = "code" }
76+
}
77+
78+
/**
79+
* An argument to `require` path; hard-coded data should not flow here.
80+
*/
81+
private class RequireArgumentSink extends Sink {
82+
RequireArgumentSink() {
83+
exists(DataFlow::CallNode require |
84+
require.getReceiver().getExprNode().getExpr() instanceof AST::SelfVariableAccess and
85+
require.getMethodName() = "require"
86+
|
87+
this = require.getArgument(0)
88+
)
89+
}
90+
91+
override string getKind() { result = "an import path" }
92+
}
93+
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/**
2+
* Provides a taint-tracking configuration for reasoning about hard-coded data
3+
* being interpreted as code.
4+
*
5+
* Note, for performance reasons: only import this file if
6+
* `HardcodedDataInterpretedAsCode::Configuration` is needed, otherwise
7+
* `HardcodedDataInterpretedAsCodeCustomizations` should be imported instead.
8+
*/
9+
10+
private import codeql.ruby.DataFlow
11+
private import codeql.ruby.TaintTracking
12+
private import codeql.ruby.dataflow.internal.TaintTrackingPrivate
13+
import HardcodedDataInterpretedAsCodeCustomizations::HardcodedDataInterpretedAsCode
14+
15+
/**
16+
* A taint-tracking configuration for reasoning about hard-coded data
17+
* being interpreted as code.
18+
*
19+
* We extend `DataFlow::Configuration` rather than
20+
* `TaintTracking::Configuration`, so that we can set the flow state to
21+
* `"taint"` on a taint step.
22+
*/
23+
class Configuration extends DataFlow::Configuration {
24+
Configuration() { this = "HardcodedDataInterpretedAsCode" }
25+
26+
override predicate isSource(DataFlow::Node source, DataFlow::FlowState label) {
27+
source.(Source).getLabel() = label
28+
}
29+
30+
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState label) {
31+
sink.(Sink).getLabel() = label
32+
}
33+
34+
override predicate isBarrier(DataFlow::Node node) {
35+
super.isBarrier(node) or
36+
node instanceof Sanitizer
37+
}
38+
39+
override predicate isAdditionalFlowStep(
40+
DataFlow::Node nodeFrom, DataFlow::FlowState stateFrom, DataFlow::Node nodeTo,
41+
DataFlow::FlowState stateTo
42+
) {
43+
defaultAdditionalTaintStep(nodeFrom, nodeTo) and
44+
// This is a taint step, so the flow state becomes `taint`.
45+
stateFrom = [FlowState::data(), FlowState::taint()] and
46+
stateTo = FlowState::taint()
47+
}
48+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: newQuery
3+
---
4+
* Added a new query, `rb/hardcoded-data-interpreted-as-code`, to detect cases where hardcoded data is executed as code, a technique associated with backdoors.
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
2+
<qhelp>
3+
4+
<overview>
5+
<p>
6+
Interpreting hard-coded data (such as string literals containing hexadecimal numbers)
7+
as code or as an import path is typical of malicious backdoor code that has been
8+
implanted into an otherwise trusted code base and is trying to hide its true purpose
9+
from casual readers or automated scanning tools.
10+
</p>
11+
</overview>
12+
13+
<recommendation>
14+
<p>
15+
Examine the code in question carefully to ascertain its provenance and its true purpose.
16+
If the code is benign, it should always be possible to rewrite it without relying
17+
on dynamically interpreting data as code, improving both clarity and safety.
18+
</p>
19+
</recommendation>
20+
21+
<example>
22+
<p>
23+
As an example of malicious code using this obfuscation technique, consider the
24+
following simplified Ruby version of a snippet of backdoor code that was
25+
discovered in a dependency of the popular JavaScript <code>event-stream</code>
26+
npm package:
27+
</p>
28+
<sample src="examples/HardcodedDataInterpretedAsCode.rb"/>
29+
<p>
30+
While this shows only the first few lines of code, it already looks very suspicious
31+
since it takes a hard-coded string literal, hex-decodes it and then uses it as an
32+
import path. The only reason to do so is to hide the name of the file being imported.
33+
</p>
34+
</example>
35+
36+
<references>
37+
<li>
38+
OWASP:
39+
<a href="https://www.owasp.org/index.php/Trojan_Horse">Trojan Horse</a>.
40+
</li>
41+
<li>
42+
The npm Blog:
43+
<a href="https://blog.npmjs.org/post/180565383195/details-about-the-event-stream-incident">Details about the event-stream incident</a>.
44+
</li>
45+
</references>
46+
47+
</qhelp>
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/**
2+
* @name Hard-coded data interpreted as code
3+
* @description Transforming hard-coded data (such as hexadecimal constants) into code
4+
* to be executed is a technique often associated with backdoors and should
5+
* be avoided.
6+
* @kind path-problem
7+
* @problem.severity error
8+
* @security-severity 9.1
9+
* @precision medium
10+
* @id rb/hardcoded-data-interpreted-as-code
11+
* @tags security
12+
* external/cwe/cwe-506
13+
*/
14+
15+
import codeql.ruby.security.HardcodedDataInterpretedAsCodeQuery
16+
import codeql.ruby.DataFlow
17+
import DataFlow::PathGraph
18+
19+
from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
20+
where cfg.hasFlowPath(source, sink)
21+
select sink.getNode(), source, sink,
22+
"Hard-coded data from $@ is interpreted as " + sink.getNode().(Sink).getKind() + ".",
23+
source.getNode(), "here"
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
def e(r)
2+
[r].pack 'H*'
3+
end
4+
5+
# BAD: hexadecimal constant decoded and interpreted as import path
6+
require e("2e2f746573742f64617461")
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
edges
2+
| tst.rb:1:7:1:7 | r : | tst.rb:2:4:2:4 | r : |
3+
| tst.rb:2:4:2:4 | r : | tst.rb:2:3:2:15 | call to pack : |
4+
| tst.rb:5:27:5:72 | "707574732822636f646520696e6a6..." : | tst.rb:7:8:7:30 | totally_harmless_string : |
5+
| tst.rb:7:8:7:30 | totally_harmless_string : | tst.rb:1:7:1:7 | r : |
6+
| tst.rb:7:8:7:30 | totally_harmless_string : | tst.rb:7:6:7:31 | call to e |
7+
| tst.rb:10:11:10:24 | "666f6f626172" : | tst.rb:1:7:1:7 | r : |
8+
| tst.rb:10:11:10:24 | "666f6f626172" : | tst.rb:10:9:10:25 | call to e |
9+
nodes
10+
| tst.rb:1:7:1:7 | r : | semmle.label | r : |
11+
| tst.rb:2:3:2:15 | call to pack : | semmle.label | call to pack : |
12+
| tst.rb:2:4:2:4 | r : | semmle.label | r : |
13+
| tst.rb:5:27:5:72 | "707574732822636f646520696e6a6..." : | semmle.label | "707574732822636f646520696e6a6..." : |
14+
| tst.rb:7:6:7:31 | call to e | semmle.label | call to e |
15+
| tst.rb:7:8:7:30 | totally_harmless_string : | semmle.label | totally_harmless_string : |
16+
| tst.rb:10:9:10:25 | call to e | semmle.label | call to e |
17+
| tst.rb:10:11:10:24 | "666f6f626172" : | semmle.label | "666f6f626172" : |
18+
subpaths
19+
| tst.rb:7:8:7:30 | totally_harmless_string : | tst.rb:1:7:1:7 | r : | tst.rb:2:3:2:15 | call to pack : | tst.rb:7:6:7:31 | call to e |
20+
| tst.rb:10:11:10:24 | "666f6f626172" : | tst.rb:1:7:1:7 | r : | tst.rb:2:3:2:15 | call to pack : | tst.rb:10:9:10:25 | call to e |
21+
#select
22+
| tst.rb:7:6:7:31 | call to e | tst.rb:5:27:5:72 | "707574732822636f646520696e6a6..." : | tst.rb:7:6:7:31 | call to e | Hard-coded data from $@ is interpreted as code. | tst.rb:5:27:5:72 | "707574732822636f646520696e6a6..." | here |
23+
| tst.rb:10:9:10:25 | call to e | tst.rb:10:11:10:24 | "666f6f626172" : | tst.rb:10:9:10:25 | call to e | Hard-coded data from $@ is interpreted as an import path. | tst.rb:10:11:10:24 | "666f6f626172" | here |
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
queries/security/cwe-506/HardcodedDataInterpretedAsCode.ql
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
def e(r)
2+
[r].pack 'H*'
3+
end
4+
5+
totally_harmless_string = '707574732822636f646520696e6a656374696f6e2229'
6+
7+
eval(e(totally_harmless_string)) # NOT OK: eval("puts('hello'")
8+
eval(totally_harmless_string) # OK: throws parse error
9+
10+
require e('666f6f626172') # NOT OK: require 'foobar'
11+
require '666f6f626172' # OK: no taint step between source and sink
12+
13+
x = 'deadbeef'
14+
require e(x) # OK: doesn't meet our criteria for being a source

0 commit comments

Comments
 (0)