Skip to content

Commit 898689f

Browse files
authored
Merge pull request #9896 from github/nickrolfe/hardcoded_code
Ruby: port js/hardcoded-data-interpreted-as-code
2 parents 7887f66 + 52d4655 commit 898689f

File tree

9 files changed

+287
-0
lines changed

9 files changed

+287
-0
lines changed
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
/**
2+
* Provides default sources, sinks and sanitizers for reasoning about hard-coded
3+
* data being interpreted as code, as well as extension points for adding your
4+
* own.
5+
*/
6+
7+
private import codeql.ruby.DataFlow
8+
private import codeql.ruby.security.CodeInjectionCustomizations
9+
private import codeql.ruby.AST as AST
10+
private import codeql.ruby.controlflow.CfgNodes
11+
12+
/**
13+
* Provides default sources, sinks and sanitizers for reasoning about hard-coded
14+
* data being interpreted as code, as well as extension points for adding your
15+
* own.
16+
*/
17+
module HardcodedDataInterpretedAsCode {
18+
/**
19+
* Flow states used to distinguish value-preserving flow from taint flow.
20+
*/
21+
module FlowState {
22+
/** Flow state used to track value-preserving flow. */
23+
DataFlow::FlowState data() { result = "data" }
24+
25+
/** Flow state used to tainted data (non-value preserving flow). */
26+
DataFlow::FlowState taint() { result = "taint" }
27+
}
28+
29+
/**
30+
* A data flow source for hard-coded data.
31+
*/
32+
abstract class Source extends DataFlow::Node {
33+
/** Gets a flow label for which this is a source. */
34+
DataFlow::FlowState getLabel() { result = FlowState::data() }
35+
}
36+
37+
/**
38+
* A data flow sink for code injection.
39+
*/
40+
abstract class Sink extends DataFlow::Node {
41+
/** Gets a description of what kind of sink this is. */
42+
abstract string getKind();
43+
44+
/** Gets a flow label for which this is a sink. */
45+
DataFlow::FlowState getLabel() {
46+
// We want to ignore value-flow and only consider taint-flow, since the
47+
// source is just a hex string, and evaluating that directly will just
48+
// cause a syntax error.
49+
result = FlowState::taint()
50+
}
51+
}
52+
53+
/** A sanitizer for hard-coded data. */
54+
abstract class Sanitizer extends DataFlow::Node { }
55+
56+
/**
57+
* A constant string consisting of eight or more hexadecimal characters (including at
58+
* least one digit), viewed as a source of hard-coded data that should not be
59+
* interpreted as code.
60+
*/
61+
private class HexStringSource extends Source {
62+
HexStringSource() {
63+
exists(string val |
64+
this.asExpr().(ExprNodes::StringLiteralCfgNode).getConstantValue().isString(val)
65+
|
66+
val.regexpMatch("[0-9a-fA-F]{8,}") and
67+
val.regexpMatch(".*[0-9].*")
68+
)
69+
}
70+
}
71+
72+
/**
73+
* A string literal whose raw text is made up entirely of `\x` escape
74+
* sequences, viewed as a source of hard-coded data that should not be
75+
* interpreted as code.
76+
*/
77+
private class HexEscapedStringSource extends Source {
78+
HexEscapedStringSource() {
79+
forex(StringComponentCfgNode c |
80+
c = this.asExpr().(ExprNodes::StringlikeLiteralCfgNode).getAComponent()
81+
|
82+
c.getNode().(AST::StringEscapeSequenceComponent).getRawText().matches("\\x%")
83+
)
84+
}
85+
}
86+
87+
/**
88+
* A code injection sink; hard-coded data should not flow here.
89+
*/
90+
private class DefaultCodeInjectionSink extends Sink {
91+
DefaultCodeInjectionSink() { this instanceof CodeInjection::Sink }
92+
93+
override string getKind() { result = "code" }
94+
}
95+
96+
/**
97+
* An argument to `require` path; hard-coded data should not flow here.
98+
*/
99+
private class RequireArgumentSink extends Sink {
100+
RequireArgumentSink() {
101+
exists(DataFlow::CallNode require |
102+
require.getReceiver().getExprNode().getExpr() instanceof AST::SelfVariableAccess and
103+
require.getMethodName() = "require"
104+
|
105+
this = require.getArgument(0)
106+
)
107+
}
108+
109+
override string getKind() { result = "an import path" }
110+
}
111+
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/**
2+
* Provides a taint-tracking configuration for reasoning about hard-coded data
3+
* being interpreted as code.
4+
*
5+
* Note, for performance reasons: only import this file if
6+
* `HardcodedDataInterpretedAsCode::Configuration` is needed, otherwise
7+
* `HardcodedDataInterpretedAsCodeCustomizations` should be imported instead.
8+
*/
9+
10+
private import codeql.ruby.DataFlow
11+
private import codeql.ruby.TaintTracking
12+
private import codeql.ruby.dataflow.internal.TaintTrackingPrivate
13+
import HardcodedDataInterpretedAsCodeCustomizations::HardcodedDataInterpretedAsCode
14+
15+
/**
16+
* A taint-tracking configuration for reasoning about hard-coded data
17+
* being interpreted as code.
18+
*
19+
* We extend `DataFlow::Configuration` rather than
20+
* `TaintTracking::Configuration`, so that we can set the flow state to
21+
* `"taint"` on a taint step.
22+
*/
23+
class Configuration extends DataFlow::Configuration {
24+
Configuration() { this = "HardcodedDataInterpretedAsCode" }
25+
26+
override predicate isSource(DataFlow::Node source, DataFlow::FlowState label) {
27+
source.(Source).getLabel() = label
28+
}
29+
30+
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState label) {
31+
sink.(Sink).getLabel() = label
32+
}
33+
34+
override predicate isBarrier(DataFlow::Node node) {
35+
super.isBarrier(node) or
36+
node instanceof Sanitizer
37+
}
38+
39+
override predicate isAdditionalFlowStep(
40+
DataFlow::Node nodeFrom, DataFlow::FlowState stateFrom, DataFlow::Node nodeTo,
41+
DataFlow::FlowState stateTo
42+
) {
43+
defaultAdditionalTaintStep(nodeFrom, nodeTo) and
44+
// This is a taint step, so the flow state becomes `taint`.
45+
stateFrom = [FlowState::data(), FlowState::taint()] and
46+
stateTo = FlowState::taint()
47+
}
48+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: newQuery
3+
---
4+
* Added a new query, `rb/hardcoded-data-interpreted-as-code`, to detect cases where hardcoded data is executed as code, a technique associated with backdoors.
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
2+
<qhelp>
3+
4+
<overview>
5+
<p>
6+
Interpreting hard-coded data (such as string literals containing hexadecimal numbers)
7+
as code or as an import path is typical of malicious backdoor code that has been
8+
implanted into an otherwise trusted code base and is trying to hide its true purpose
9+
from casual readers or automated scanning tools.
10+
</p>
11+
</overview>
12+
13+
<recommendation>
14+
<p>
15+
Examine the code in question carefully to ascertain its provenance and its true purpose.
16+
If the code is benign, it should always be possible to rewrite it without relying
17+
on dynamically interpreting data as code, improving both clarity and safety.
18+
</p>
19+
</recommendation>
20+
21+
<example>
22+
<p>
23+
As an example of malicious code using this obfuscation technique, consider the
24+
following simplified Ruby version of a snippet of backdoor code that was
25+
discovered in a dependency of the popular JavaScript <code>event-stream</code>
26+
npm package:
27+
</p>
28+
<sample src="examples/HardcodedDataInterpretedAsCode.rb"/>
29+
<p>
30+
While this shows only the first few lines of code, it already looks very suspicious
31+
since it takes a hard-coded string literal, hex-decodes it and then uses it as an
32+
import path. The only reason to do so is to hide the name of the file being imported.
33+
</p>
34+
</example>
35+
36+
<references>
37+
<li>
38+
OWASP:
39+
<a href="https://www.owasp.org/index.php/Trojan_Horse">Trojan Horse</a>.
40+
</li>
41+
<li>
42+
The npm Blog:
43+
<a href="https://blog.npmjs.org/post/180565383195/details-about-the-event-stream-incident">Details about the event-stream incident</a>.
44+
</li>
45+
</references>
46+
47+
</qhelp>
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/**
2+
* @name Hard-coded data interpreted as code
3+
* @description Transforming hard-coded data (such as hexadecimal constants) into code
4+
* to be executed is a technique often associated with backdoors and should
5+
* be avoided.
6+
* @kind path-problem
7+
* @problem.severity error
8+
* @security-severity 9.1
9+
* @precision medium
10+
* @id rb/hardcoded-data-interpreted-as-code
11+
* @tags security
12+
* external/cwe/cwe-506
13+
*/
14+
15+
import codeql.ruby.security.HardcodedDataInterpretedAsCodeQuery
16+
import codeql.ruby.DataFlow
17+
import DataFlow::PathGraph
18+
19+
from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
20+
where cfg.hasFlowPath(source, sink)
21+
select sink.getNode(), source, sink,
22+
"Hard-coded data from $@ is interpreted as " + sink.getNode().(Sink).getKind() + ".",
23+
source.getNode(), "here"
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
def e(r)
2+
[r].pack 'H*'
3+
end
4+
5+
# BAD: hexadecimal constant decoded and interpreted as import path
6+
require e("2e2f746573742f64617461")
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
edges
2+
| tst.rb:1:7:1:7 | r : | tst.rb:2:4:2:4 | r : |
3+
| tst.rb:2:4:2:4 | r : | tst.rb:2:3:2:15 | call to pack : |
4+
| tst.rb:5:27:5:72 | "707574732822636f646520696e6a6..." : | tst.rb:7:8:7:30 | totally_harmless_string : |
5+
| tst.rb:7:8:7:30 | totally_harmless_string : | tst.rb:1:7:1:7 | r : |
6+
| tst.rb:7:8:7:30 | totally_harmless_string : | tst.rb:7:6:7:31 | call to e |
7+
| tst.rb:10:11:10:24 | "666f6f626172" : | tst.rb:1:7:1:7 | r : |
8+
| tst.rb:10:11:10:24 | "666f6f626172" : | tst.rb:10:9:10:25 | call to e |
9+
| tst.rb:16:31:16:84 | "\\x70\\x75\\x74\\x73\\x28\\x27\\x68\\..." : | tst.rb:17:6:17:32 | another_questionable_string : |
10+
| tst.rb:17:6:17:32 | another_questionable_string : | tst.rb:17:6:17:38 | call to strip |
11+
nodes
12+
| tst.rb:1:7:1:7 | r : | semmle.label | r : |
13+
| tst.rb:2:3:2:15 | call to pack : | semmle.label | call to pack : |
14+
| tst.rb:2:4:2:4 | r : | semmle.label | r : |
15+
| tst.rb:5:27:5:72 | "707574732822636f646520696e6a6..." : | semmle.label | "707574732822636f646520696e6a6..." : |
16+
| tst.rb:7:6:7:31 | call to e | semmle.label | call to e |
17+
| tst.rb:7:8:7:30 | totally_harmless_string : | semmle.label | totally_harmless_string : |
18+
| tst.rb:10:9:10:25 | call to e | semmle.label | call to e |
19+
| tst.rb:10:11:10:24 | "666f6f626172" : | semmle.label | "666f6f626172" : |
20+
| tst.rb:16:31:16:84 | "\\x70\\x75\\x74\\x73\\x28\\x27\\x68\\..." : | semmle.label | "\\x70\\x75\\x74\\x73\\x28\\x27\\x68\\..." : |
21+
| tst.rb:17:6:17:32 | another_questionable_string : | semmle.label | another_questionable_string : |
22+
| tst.rb:17:6:17:38 | call to strip | semmle.label | call to strip |
23+
subpaths
24+
| tst.rb:7:8:7:30 | totally_harmless_string : | tst.rb:1:7:1:7 | r : | tst.rb:2:3:2:15 | call to pack : | tst.rb:7:6:7:31 | call to e |
25+
| tst.rb:10:11:10:24 | "666f6f626172" : | tst.rb:1:7:1:7 | r : | tst.rb:2:3:2:15 | call to pack : | tst.rb:10:9:10:25 | call to e |
26+
#select
27+
| tst.rb:7:6:7:31 | call to e | tst.rb:5:27:5:72 | "707574732822636f646520696e6a6..." : | tst.rb:7:6:7:31 | call to e | Hard-coded data from $@ is interpreted as code. | tst.rb:5:27:5:72 | "707574732822636f646520696e6a6..." | here |
28+
| tst.rb:10:9:10:25 | call to e | tst.rb:10:11:10:24 | "666f6f626172" : | tst.rb:10:9:10:25 | call to e | Hard-coded data from $@ is interpreted as an import path. | tst.rb:10:11:10:24 | "666f6f626172" | here |
29+
| tst.rb:17:6:17:38 | call to strip | tst.rb:16:31:16:84 | "\\x70\\x75\\x74\\x73\\x28\\x27\\x68\\..." : | tst.rb:17:6:17:38 | call to strip | Hard-coded data from $@ is interpreted as code. | tst.rb:16:31:16:84 | "\\x70\\x75\\x74\\x73\\x28\\x27\\x68\\..." | here |
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
queries/security/cwe-506/HardcodedDataInterpretedAsCode.ql
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
def e(r)
2+
[r].pack 'H*'
3+
end
4+
5+
totally_harmless_string = '707574732822636f646520696e6a656374696f6e2229'
6+
7+
eval(e(totally_harmless_string)) # NOT OK: eval("puts('hello'")
8+
eval(totally_harmless_string) # OK: throws parse error
9+
10+
require e('666f6f626172') # NOT OK: require 'foobar'
11+
require '666f6f626172' # OK: no taint step between source and sink
12+
13+
x = 'deadbeef'
14+
require e(x) # OK: doesn't meet our criteria for being a source
15+
16+
another_questionable_string = "\x70\x75\x74\x73\x28\x27\x68\x65\x6C\x6C\x6F\x27\x29"
17+
eval(another_questionable_string.strip) # NOT OK: eval("puts('hello'")
18+
eval(another_questionable_string) # OK: no taint step between source and sink

0 commit comments

Comments
 (0)