Skip to content

Commit 79f3da8

Browse files
authored
Merge pull request #8506 from michaelnebel/java/generalize-generate-flow-model
Java/C#: Generalize script for generating flow models.
2 parents 2014599 + bbe28bc commit 79f3da8

File tree

2 files changed

+211
-171
lines changed

2 files changed

+211
-171
lines changed
Lines changed: 8 additions & 171 deletions
Original file line numberDiff line numberDiff line change
@@ -1,178 +1,15 @@
11
#!/usr/bin/python3
22

3-
import json
4-
import os
3+
import sys
54
import os.path
6-
import shlex
75
import subprocess
8-
import sys
9-
import tempfile
10-
11-
12-
def printHelp():
13-
print("""Usage:
14-
GenerateFlowModel.py <library-database> <outputQll> [--with-sinks] [--with-sources] [--with-summaries]
15-
16-
This generates summary, source and sink models for the code in the database.
17-
The files will be placed in `java/ql/lib/semmle/code/java/frameworks/<outputQll>` where
18-
outputQll is the name (and path) of the output QLL file. Usually, models are grouped by their
19-
respective frameworks.
20-
21-
Which models are generated is controlled by the flags:
22-
--with-sinks
23-
--with-sources
24-
--with-summaries
25-
If none of these flags are specified, all models are generated.
26-
27-
Example invocations:
28-
$ GenerateFlowModel.py /tmp/dbs/apache_commons-codec_45649c8 "apache/Codec.qll"
29-
$ GenerateFlowModel.py /tmp/dbs/jdk15_db "javase/jdk_sinks.qll" --with-sinks
30-
31-
Requirements: `codeql` should both appear on your path.
32-
""")
33-
34-
35-
if any(s == "--help" for s in sys.argv):
36-
printHelp()
37-
sys.exit(0)
38-
39-
generateSinks = False
40-
generateSources = False
41-
generateSummaries = False
42-
if "--with-sinks" in sys.argv:
43-
sys.argv.remove("--with-sinks")
44-
generateSinks = True
45-
46-
if "--with-sources" in sys.argv:
47-
sys.argv.remove("--with-sources")
48-
generateSources = True
49-
50-
if "--with-summaries" in sys.argv:
51-
sys.argv.remove("--with-summaries")
52-
generateSummaries = True
53-
54-
if not generateSinks and not generateSources and not generateSummaries:
55-
generateSinks = generateSources = generateSummaries = True
56-
57-
if len(sys.argv) != 3:
58-
printHelp()
59-
sys.exit(1)
60-
61-
codeQlRoot = subprocess.check_output(
62-
["git", "rev-parse", "--show-toplevel"]).decode("utf-8").strip()
63-
targetQll = sys.argv[2]
64-
if not targetQll.endswith(".qll"):
65-
targetQll += ".qll"
66-
filename = os.path.basename(targetQll)
67-
shortname = filename[:-4]
68-
generatedFrameworks = os.path.join(
69-
codeQlRoot, "java/ql/lib/semmle/code/java/frameworks/")
70-
frameworkTarget = os.path.join(generatedFrameworks, targetQll)
71-
72-
workDir = tempfile.mkdtemp()
73-
os.makedirs(generatedFrameworks, exist_ok=True)
74-
75-
76-
def runQuery(infoMessage, query):
77-
print("########## Querying " + infoMessage + "...")
78-
database = sys.argv[1]
79-
queryFile = os.path.join(os.path.dirname(
80-
__file__), query)
81-
resultBqrs = os.path.join(workDir, "out.bqrs")
82-
cmd = ['codeql', 'query', 'run', queryFile, '--database',
83-
database, '--output', resultBqrs, '--threads', '8']
84-
85-
ret = subprocess.call(cmd)
86-
if ret != 0:
87-
print("Failed to generate " + infoMessage +
88-
". Failed command was: " + shlex.join(cmd))
89-
sys.exit(1)
90-
return readRows(resultBqrs)
91-
92-
93-
def readRows(bqrsFile):
94-
generatedJson = os.path.join(workDir, "out.json")
95-
cmd = ['codeql', 'bqrs', 'decode', bqrsFile,
96-
'--format=json', '--output', generatedJson]
97-
ret = subprocess.call(cmd)
98-
if ret != 0:
99-
print("Failed to decode BQRS. Failed command was: " + shlex.join(cmd))
100-
sys.exit(1)
101-
102-
with open(generatedJson) as f:
103-
results = json.load(f)
104-
105-
try:
106-
results['#select']['tuples']
107-
except KeyError:
108-
print('Unexpected JSON output - no tuples found')
109-
exit(1)
110-
111-
rows = ""
112-
for (row) in results['#select']['tuples']:
113-
rows += " \"" + row[0] + "\",\n"
114-
115-
return rows[:-2]
116-
117-
118-
def asCsvModel(superclass, kind, rows):
119-
classTemplate = """
120-
private class {0}{1}Csv extends {2} {{
121-
override predicate row(string row) {{
122-
row =
123-
[
124-
{3}
125-
]
126-
}}
127-
}}
128-
"""
129-
if rows.strip() == "":
130-
return ""
131-
return classTemplate.format(shortname[0].upper() + shortname[1:], kind.capitalize(), superclass, rows)
132-
133-
134-
if generateSummaries:
135-
summaryRows = runQuery("summary models", "CaptureSummaryModels.ql")
136-
summaryCsv = asCsvModel("SummaryModelCsv", "summary", summaryRows)
137-
else:
138-
summaryCsv = ""
139-
140-
if generateSinks:
141-
sinkRows = runQuery("sink models", "CaptureSinkModels.ql")
142-
sinkCsv = asCsvModel("SinkModelCsv", "sinks", sinkRows)
143-
else:
144-
sinkCsv = ""
145-
146-
if generateSources:
147-
sourceRows = runQuery("source models", "CaptureSourceModels.ql")
148-
sourceCsv = asCsvModel("SourceModelCsv", "sources", sourceRows)
149-
else:
150-
sourceCsv = ""
151-
152-
qllTemplate = """
153-
/** Definitions of taint steps in the {0} framework */
154-
155-
import java
156-
private import semmle.code.java.dataflow.ExternalFlow
157-
158-
{1}
159-
{2}
160-
{3}
161-
162-
"""
163-
164-
165-
qllContents = qllTemplate.format(shortname, sinkCsv, sourceCsv, summaryCsv)
166-
1676

168-
with open(frameworkTarget, "w") as frameworkQll:
169-
frameworkQll.write(qllContents)
7+
# Add Model as Data script directory to sys.path.
8+
gitroot = subprocess.check_output(["git", "rev-parse", "--show-toplevel"]).decode("utf-8").strip()
9+
madpath = os.path.join(gitroot, "misc/scripts/models-as-data/")
10+
sys.path.append(madpath)
17011

171-
cmd = ['codeql', 'query', 'format', '--in-place', frameworkTarget]
172-
ret = subprocess.call(cmd)
173-
if ret != 0:
174-
print("Failed to format query. Failed command was: " + shlex.join(cmd))
175-
sys.exit(1)
12+
import generate_flow_model as model
17613

177-
print("")
178-
print("CSV model written to " + frameworkTarget)
14+
language = "java"
15+
model.Generator.make(language).run()
Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
#!/usr/bin/python3
2+
3+
import json
4+
import os
5+
import os.path
6+
import shlex
7+
import subprocess
8+
import sys
9+
import tempfile
10+
11+
class Generator:
12+
def __init__ (self, language):
13+
self.language = language
14+
self.generateSinks = False
15+
self.generateSources = False
16+
self.generateSummaries = False
17+
self.dryRun = False
18+
19+
20+
def printHelp(self):
21+
print(f"""Usage:
22+
python3 GenerateFlowModel.py <library-database> <outputQll> [--with-sinks] [--with-sources] [--with-summaries] [--dry-run]
23+
24+
This generates summary, source and sink models for the code in the database.
25+
The files will be placed in `{self.language}/ql/lib/semmle/code/{self.language}/frameworks/<outputQll>` where
26+
outputQll is the name (and path) of the output QLL file. Usually, models are grouped by their
27+
respective frameworks.
28+
29+
Which models are generated is controlled by the flags:
30+
--with-sinks
31+
--with-sources
32+
--with-summaries
33+
If none of these flags are specified, all models are generated.
34+
35+
--dry-run: Only run the queries, but don't write to file.
36+
37+
Example invocations:
38+
$ python3 GenerateFlowModel.py /tmp/dbs/my_library_db "mylibrary/Framework.qll"
39+
$ python3 GenerateFlowModel.py /tmp/dbs/my_library_db "mylibrary/FrameworkSinks.qll" --with-sinks
40+
41+
Requirements: `codeql` should both appear on your path.
42+
""")
43+
44+
45+
def setenvironment(self, target, database):
46+
self.codeQlRoot = subprocess.check_output(["git", "rev-parse", "--show-toplevel"]).decode("utf-8").strip()
47+
if not target.endswith(".qll"):
48+
target += ".qll"
49+
self.filename = os.path.basename(target)
50+
self.shortname = self.filename[:-4]
51+
self.database = database
52+
self.generatedFrameworks = os.path.join(
53+
self.codeQlRoot, f"{self.language}/ql/lib/semmle/code/{self.language}/frameworks/")
54+
self.frameworkTarget = os.path.join(self.generatedFrameworks, target)
55+
56+
self.workDir = tempfile.mkdtemp()
57+
os.makedirs(self.generatedFrameworks, exist_ok=True)
58+
59+
60+
@staticmethod
61+
def make(language):
62+
generator = Generator(language)
63+
if any(s == "--help" for s in sys.argv):
64+
generator.printHelp()
65+
sys.exit(0)
66+
67+
if "--with-sinks" in sys.argv:
68+
sys.argv.remove("--with-sinks")
69+
generator.generateSinks = True
70+
71+
if "--with-sources" in sys.argv:
72+
sys.argv.remove("--with-sources")
73+
generator.generateSources = True
74+
75+
if "--with-summaries" in sys.argv:
76+
sys.argv.remove("--with-summaries")
77+
generator.generateSummaries = True
78+
79+
if "--dry-run" in sys.argv:
80+
sys.argv.remove("--dry-run")
81+
generator.dryRun = True
82+
83+
if not generator.generateSinks and not generator.generateSources and not generator.generateSummaries:
84+
generator.generateSinks = generator.generateSources = generator.generateSummaries = True
85+
86+
if len(sys.argv) != 3:
87+
generator.printHelp()
88+
sys.exit(1)
89+
90+
generator.setenvironment(sys.argv[2], sys.argv[1])
91+
return generator
92+
93+
94+
def runQuery(self, infoMessage, query):
95+
print("########## Querying " + infoMessage + "...")
96+
queryFile = os.path.join(self.codeQlRoot, f"{self.language}/ql/src/utils/model-generator", query)
97+
resultBqrs = os.path.join(self.workDir, "out.bqrs")
98+
cmd = ['codeql', 'query', 'run', queryFile, '--database',
99+
self.database, '--output', resultBqrs, '--threads', '8']
100+
101+
ret = subprocess.call(cmd)
102+
if ret != 0:
103+
print("Failed to generate " + infoMessage +
104+
". Failed command was: " + shlex.join(cmd))
105+
sys.exit(1)
106+
return self.readRows(resultBqrs)
107+
108+
109+
def readRows(self, bqrsFile):
110+
generatedJson = os.path.join(self.workDir, "out.json")
111+
cmd = ['codeql', 'bqrs', 'decode', bqrsFile,
112+
'--format=json', '--output', generatedJson]
113+
ret = subprocess.call(cmd)
114+
if ret != 0:
115+
print("Failed to decode BQRS. Failed command was: " + shlex.join(cmd))
116+
sys.exit(1)
117+
118+
with open(generatedJson) as f:
119+
results = json.load(f)
120+
121+
try:
122+
results['#select']['tuples']
123+
except KeyError:
124+
print('Unexpected JSON output - no tuples found')
125+
exit(1)
126+
127+
rows = ""
128+
for (row) in results['#select']['tuples']:
129+
rows += " \"" + row[0] + "\",\n"
130+
131+
return rows[:-2]
132+
133+
134+
def asCsvModel(self, superclass, kind, rows):
135+
classTemplate = """
136+
private class {0}{1}Csv extends {2} {{
137+
override predicate row(string row) {{
138+
row =
139+
[
140+
{3}
141+
]
142+
}}
143+
}}
144+
"""
145+
if rows.strip() == "":
146+
return ""
147+
return classTemplate.format(self.shortname[0].upper() + self.shortname[1:], kind.capitalize(), superclass, rows)
148+
149+
150+
def makeContent(self):
151+
if self.generateSummaries:
152+
summaryRows = self.runQuery("summary models", "CaptureSummaryModels.ql")
153+
summaryCsv = self.asCsvModel("SummaryModelCsv", "summary", summaryRows)
154+
else:
155+
summaryCsv = ""
156+
157+
if self.generateSinks:
158+
sinkRows = self.runQuery("sink models", "CaptureSinkModels.ql")
159+
sinkCsv = self.asCsvModel("SinkModelCsv", "sinks", sinkRows)
160+
else:
161+
sinkCsv = ""
162+
163+
if self.generateSources:
164+
sourceRows = self.runQuery("source models", "CaptureSourceModels.ql")
165+
sourceCsv = self.asCsvModel("SourceModelCsv", "sources", sourceRows)
166+
else:
167+
sourceCsv = ""
168+
169+
return f"""
170+
/** Definitions of taint steps in the {self.shortname} framework */
171+
172+
import {self.language}
173+
private import semmle.code.{self.language}.dataflow.ExternalFlow
174+
175+
{sinkCsv}
176+
{sourceCsv}
177+
{summaryCsv}
178+
179+
"""
180+
181+
182+
def save(self, content):
183+
with open(self.frameworkTarget, "w") as frameworkQll:
184+
frameworkQll.write(content)
185+
186+
cmd = ['codeql', 'query', 'format', '--in-place', self.frameworkTarget]
187+
ret = subprocess.call(cmd)
188+
if ret != 0:
189+
print("Failed to format query. Failed command was: " + shlex.join(cmd))
190+
sys.exit(1)
191+
192+
print("")
193+
print("CSV model written to " + self.frameworkTarget)
194+
195+
196+
def run(self):
197+
content = self.makeContent()
198+
199+
if self.dryRun:
200+
print("CSV Models generated, but not written to file.")
201+
sys.exit(0)
202+
203+
self.save(content)

0 commit comments

Comments
 (0)