Skip to content

Commit d5fd11a

Browse files
authored
appcheck-null-byte-fix Fix null byte issue with AppCheck parser. HTTP/2 headers included in the .details.Messages entry are now decoded as req/res pairs, and escaped to prevent null bytes from causing a crash when persisted to the database (#10804)
1 parent d417cd1 commit d5fd11a

File tree

4 files changed

+631
-19
lines changed

4 files changed

+631
-19
lines changed

dojo/tools/appcheck_web_application_scanner/engines/appcheck.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,18 @@ class AppCheckScanningEngineParser(BaseEngineParser):
1414
"""
1515
SCANNING_ENGINE = "NewAppCheckScannerMultiple"
1616

17-
REQUEST_RESPONSE_PATTERN = re.compile(r"^--->\n\n(.+)\n\n<---\n\n(.+)$", re.DOTALL)
17+
HTTP_1_REQUEST_RESPONSE_PATTERN = re.compile(r"^--->\n\n(.+)\n\n<---\n\n(.+)$", re.DOTALL)
18+
HTTP_2_REQUEST_RESPONSE_PATTERN = re.compile(
19+
r"^HTTP/2 Request Headers:\n\n(.+)\r\nHTTP/2 Response Headers:\n\n(.+)$", re.DOTALL)
1820

1921
def extract_request_response(self, finding: Finding, value: dict[str, [str]]) -> None:
20-
if rr_details := self.REQUEST_RESPONSE_PATTERN.findall(value.get("Messages") or ""):
21-
# Remove the 'Messages' entry since we've parsed it as a request/response pair; don't need to add it to the
22-
# Finding description
23-
value.pop("Messages")
24-
finding.unsaved_request, finding.unsaved_response = (d.strip() for d in rr_details[0])
22+
if messages := value.get("Messages"):
23+
# If we match either HTTP/1 or HTTP/2 request/response entries, remove the 'Messages' entry since we'll have
24+
# parsed it as a request/response pair; don't need to add it to the Finding description
25+
if rr_details := self.HTTP_1_REQUEST_RESPONSE_PATTERN.findall(messages)\
26+
or self.HTTP_2_REQUEST_RESPONSE_PATTERN.findall(messages):
27+
value.pop("Messages")
28+
finding.unsaved_request, finding.unsaved_response = (d.strip() for d in rr_details[0])
2529

2630
def parse_details(self, finding: Finding, value: dict[str, Union[str, dict[str, [str]]]]) -> None:
2731
self.extract_request_response(finding, value)

dojo/tools/appcheck_web_application_scanner/engines/base.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,21 @@ def strip_markup(value: str) -> str:
2525
return value
2626

2727

28+
def escape_non_printable(s: str) -> str:
29+
"""
30+
Replaces non-printable characters from a string, for some definition of non-printable that probably differs from the
31+
uncountable other available definitions of non-printable, with a more-printable version.
32+
"""
33+
def escape_if_needed(x):
34+
# Accept isprintable() stuff (which includes space) and common whitespaces that can be rendered
35+
if x.isprintable() or x in {"\r", "\n", "\t"}:
36+
return x
37+
# Anything else -- including other weird whitespaces -- use repr() to give the string representation; also
38+
# remove the surrounding single quotes
39+
return repr(x)[1:-1]
40+
return "".join([escape_if_needed(c) for c in s])
41+
42+
2843
#######
2944
# Field parsing helper classes
3045
#######
@@ -66,10 +81,10 @@ def check(self, engine_parser):
6681

6782
class DeMarkupedAttribute(Attribute):
6883
"""
69-
Class for an Attribute (as above) but whose value is stripped of markup prior to being set.
84+
Class for an Attribute (as above) but whose value is stripped of markup and non-printable chars prior to being set.
7085
"""
7186
def handle(self, engine_class, finding, value):
72-
super().handle(engine_class, finding, strip_markup(value))
87+
super().handle(engine_class, finding, escape_non_printable(strip_markup(value)))
7388

7489

7590
class Method(FieldType):
@@ -209,7 +224,7 @@ def parse_components(self, finding: Finding, value: [str]) -> None:
209224
# For parsing additional description-related entries (description, notes, and details)
210225
#####
211226
def format_additional_description(self, section: str, value: str) -> str:
212-
return f"**{section}**: {strip_markup(value)}"
227+
return f"**{section}**: {escape_non_printable(strip_markup(value))}"
213228

214229
def append_description(self, finding: Finding, addendum: dict[str, str]) -> None:
215230
if addendum:

unittests/scans/appcheck_web_application_scanner/appcheck_web_application_scanner_http2.json

Lines changed: 473 additions & 0 deletions
Large diffs are not rendered by default.

unittests/tools/test_appcheck_web_application_scanner_parser.py

Lines changed: 130 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,11 @@
22

33
from dojo.models import Finding, Test
44
from dojo.tools.appcheck_web_application_scanner.engines.appcheck import AppCheckScanningEngineParser
5-
from dojo.tools.appcheck_web_application_scanner.engines.base import BaseEngineParser, strip_markup
5+
from dojo.tools.appcheck_web_application_scanner.engines.base import (
6+
BaseEngineParser,
7+
escape_non_printable,
8+
strip_markup,
9+
)
610
from dojo.tools.appcheck_web_application_scanner.engines.nmap import NmapScanningEngineParser
711
from dojo.tools.appcheck_web_application_scanner.parser import AppCheckWebApplicationScannerParser
812

@@ -217,6 +221,71 @@ def test_appcheck_web_application_scanner_parser_dupes(self):
217221
# Test has 5 entries, but we should only return 3 findings.
218222
self.assertEqual(3, len(findings))
219223

224+
def test_appcheck_web_application_scanner_parser_http2(self):
225+
with open("unittests/scans/appcheck_web_application_scanner/appcheck_web_application_scanner_http2.json") as testfile:
226+
parser = AppCheckWebApplicationScannerParser()
227+
findings = parser.get_findings(testfile, Test())
228+
self.assertEqual(3, len(findings))
229+
230+
finding = findings[0]
231+
self.assertEqual("1c564bddf78f7642468474a49c9be6653f39e9df6b32d658", finding.unique_id_from_tool)
232+
self.assertEqual("2024-08-06", finding.date)
233+
self.assertEqual("HTTP/2 Supported", finding.title)
234+
self.assertEqual(1, len(finding.unsaved_endpoints))
235+
self.assertTrue("**Messages**" not in finding.description)
236+
self.assertTrue("\x00" not in finding.description)
237+
self.assertIsNotNone(finding.unsaved_request)
238+
self.assertTrue(finding.unsaved_request.startswith(":method = GET"))
239+
self.assertIsNotNone(finding.unsaved_response)
240+
self.assertTrue(finding.unsaved_response.startswith(":status: 200"))
241+
endpoint = finding.unsaved_endpoints[0]
242+
endpoint.clean()
243+
self.assertEqual("www.xzzvwy.com", endpoint.host)
244+
self.assertEqual(443, endpoint.port)
245+
self.assertEqual("https", endpoint.protocol)
246+
self.assertEqual("media/vzdldjmk/pingpong2.jpg", endpoint.path)
247+
self.assertEqual("rmode=max&height=500", endpoint.query)
248+
249+
finding = findings[1]
250+
self.assertEqual("4e7c0b570ff6083376b99e1897102a87907effe2199dc8d4", finding.unique_id_from_tool)
251+
self.assertEqual("2024-08-06", finding.date)
252+
self.assertEqual("HTTP/2 Protocol: Transfer-Encoding Header Accepted", finding.title)
253+
self.assertTrue("**Messages**" not in finding.description)
254+
self.assertTrue("\x00" not in finding.description)
255+
self.assertTrue("**HTTP2 Headers**" in finding.description)
256+
self.assertIsNotNone(finding.unsaved_request)
257+
self.assertTrue(finding.unsaved_request.startswith(":method = POST"))
258+
self.assertIsNotNone(finding.unsaved_response)
259+
self.assertTrue(finding.unsaved_response.startswith(":status: 200"))
260+
self.assertEqual(1, len(finding.unsaved_endpoints))
261+
endpoint = finding.unsaved_endpoints[0]
262+
endpoint.clean()
263+
self.assertEqual("www.xzzvwy.com", endpoint.host)
264+
self.assertEqual(443, endpoint.port)
265+
self.assertEqual("https", endpoint.protocol)
266+
self.assertEqual("media/mmzzvwy/pingpong2.jpg", endpoint.path)
267+
self.assertEqual("rmode=max&height=500", endpoint.query)
268+
269+
finding = findings[2]
270+
self.assertEqual("2f1fb384e6a866f9ee0c6f7550e3b607e8b1dd2b1ab0fd02", finding.unique_id_from_tool)
271+
self.assertEqual("2024-08-06", finding.date)
272+
self.assertEqual("HTTP/2 Protocol: Transfer-Encoding Header Accepted", finding.title)
273+
self.assertTrue("**Messages**" not in finding.description)
274+
self.assertTrue("**HTTP2 Headers**" in finding.description)
275+
self.assertTrue("\x00" not in finding.description)
276+
self.assertIsNotNone(finding.unsaved_request)
277+
self.assertTrue(finding.unsaved_request.startswith(":method = POST"))
278+
self.assertIsNotNone(finding.unsaved_response)
279+
self.assertTrue(finding.unsaved_response.startswith(":status: 200"))
280+
self.assertEqual(1, len(finding.unsaved_endpoints))
281+
endpoint = finding.unsaved_endpoints[0]
282+
endpoint.clean()
283+
self.assertEqual("www.zzvwy.com", endpoint.host)
284+
self.assertEqual(443, endpoint.port)
285+
self.assertEqual("https", endpoint.protocol)
286+
self.assertEqual("media/bnhfz2s2/transport-hubs.jpeg", endpoint.path)
287+
self.assertEqual("width=768&height=505&mode=crop&format=webp&quality=60", endpoint.query)
288+
220289
def test_appcheck_web_application_scanner_parser_base_engine_parser(self):
221290
engine = BaseEngineParser()
222291

@@ -411,6 +480,14 @@ def test_appcheck_web_application_scanner_parser_appcheck_engine_parser(self):
411480
{"Messages": "--->\n\nsome stuff\n\n<--\n\nhere"},
412481
# Incorrect request starting-marker
413482
{"Messages": "-->\n\nsome stuff here\n\n<---\n\nhere"},
483+
# Missing data
484+
{"Messages": "HTTP/2 Request Headers:\n\n\r\nHTTP/2 Response Headers:\n\n"},
485+
{"Messages": "HTTP/2 Request Headers:\n\n\r\nHTTP/2 Response Headers:\n\nData"},
486+
{"Messages": "HTTP/2 Request Headers:\n\nData\r\nHTTP/2 Response Headers:\n\n"},
487+
# No response
488+
{"Messages": "HTTP/2 Request Headers:\n\nData\r\n"},
489+
# No request
490+
{"Messages": "\r\nHTTP/2 Response Headers:\n\nData"},
414491
]:
415492
has_messages_entry = "Messages" in no_rr
416493
engine.extract_request_response(f, no_rr)
@@ -420,15 +497,28 @@ def test_appcheck_web_application_scanner_parser_appcheck_engine_parser(self):
420497
if has_messages_entry:
421498
self.assertTrue("Messages" in no_rr)
422499

423-
for req, res in [
424-
("some stuff", "here"), ("some stuff <---", " here"), ("s--->", "here<---"), (" s ", " h "),
425-
("some stuff... HERE\r\n\r\n", "no, here\n\n"),
426-
]:
427-
rr = {"Messages": f"--->\n\n{req}\n\n<---\n\n{res}"}
428-
engine.extract_request_response(f, rr)
429-
self.assertEqual(req.strip(), f.unsaved_request)
430-
self.assertEqual(res.strip(), f.unsaved_response)
431-
f.unsaved_request = f.unsaved_response = None
500+
for template, test_data in {
501+
# HTTP/1
502+
"--->\n\n{req}\n\n<---\n\n{res}": [
503+
("some stuff", "here"),
504+
("some stuff <---", " here"),
505+
("s--->", "here<---"),
506+
(" s ", " h "),
507+
("some stuff... HERE\r\n\r\n", "no, here\n\n"),
508+
],
509+
# HTTP/2
510+
"HTTP/2 Request Headers:\n\n{req}\r\nHTTP/2 Response Headers:\n\n{res}": [
511+
("some stuff", "here"),
512+
(" s---> ", " here<--- "),
513+
("\x00\x01\u0004\n\r\tdata", "\r\n\x00\x01\x0c\x0bdata"),
514+
],
515+
}.items():
516+
for req, res in test_data:
517+
rr = {"Messages": template.format(req=req, res=res)}
518+
engine.extract_request_response(f, rr)
519+
self.assertEqual(req.strip(), f.unsaved_request)
520+
self.assertEqual(res.strip(), f.unsaved_response)
521+
f.unsaved_request = f.unsaved_response = None
432522

433523
def test_appcheck_web_application_scanner_parser_markup_stripper(self):
434524
for markup, expected in [
@@ -440,3 +530,33 @@ def test_appcheck_web_application_scanner_parser_markup_stripper(self):
440530
("[[markup]] but with [[urlhere]]", "but with urlhere"),
441531
]:
442532
self.assertEqual(expected, strip_markup(markup))
533+
534+
def test_appcheck_web_application_scanner_parser_non_printable_escape(self):
535+
for test_string, expected in [
536+
("", ""),
537+
(
538+
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c",
539+
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\\x0b\\x0c",
540+
),
541+
("'!Test String?'\"\"", "'!Test String?'\"\""),
542+
("\r\n\tTest\r\nString\t\r\n", "\r\n\tTest\r\nString\t\r\n"),
543+
("\0Test\r\nString\0\n", "\\x00Test\r\nString\\x00\n"),
544+
("\0\0你好,\0我不知道。对马好!\n", "\\x00\\x00你好,\\x00我不知道。对马好!\n"),
545+
("\u0000", r"\x00"),
546+
("\x00", r"\x00"),
547+
("\u0000\u0000", r"\x00\x00"),
548+
("\r\n\t\t\u0000\u0000\n\n", "\r\n\t\t\\x00\\x00\n\n"),
549+
(
550+
"¡A qÙîçk ΛæzŸ ßrȯωñ Møøβe\nönce \u0000\u202d\u200e Σister's ÞΕ 🜯 ¼ 50¢ «soda¬¿ υϖυ 🤪\u000b…",
551+
"¡A qÙîçk ΛæzŸ ßrȯωñ Møøβe\nönce \\x00\\u202d\\u200e Σister's ÞΕ 🜯 ¼ 50¢ «soda¬¿ υϖυ 🤪\\x0b…",
552+
),
553+
(
554+
"Words: \u0000\u0010ABCD\u0000\u0001\u0001`\u0000jpeg\u0000CC+\u0000\b\u0000\u0003;\u0001\u0002\u00002\u001c\u0000@\u0000i\u0004\\\u0000. Done.",
555+
r"Words: \x00\x10ABCD\x00\x01\x01`\x00jpeg\x00CC+\x00\x08\x00\x03;\x01\x02\x002\x1c\x00@\x00i\x04\\x00. Done.",
556+
),
557+
(
558+
"\u0016\no#bota\u00124&7\r\u0019j9}\t\u0004ef\u202egh\u001c",
559+
"\\x16\no#bota\\x124&7\r\\x19j9}\t\\x04ef\\u202egh\\x1c",
560+
),
561+
]:
562+
self.assertEqual(expected, escape_non_printable(test_string))

0 commit comments

Comments
 (0)