Skip to content

Commit aa225fb

Browse files
zblenessarmintaenzertng
authored andcommitted
[issue-854] Remove some control characters from JSON SPDX
Signed-off-by: Zalan Blenessy <zalan.blenessy@volvocars.com>
1 parent 8dc336f commit aa225fb

File tree

3 files changed

+78
-2
lines changed

3 files changed

+78
-2
lines changed

src/spdx_tools/spdx/parser/json/json_parser.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,33 @@
33
# SPDX-License-Identifier: Apache-2.0
44
import json
55

6-
from beartype.typing import Dict
6+
from beartype.typing import Any, Dict
77

88
from spdx_tools.spdx.model import Document
99
from spdx_tools.spdx.parser.jsonlikedict.json_like_dict_parser import JsonLikeDictParser
1010

11+
# chars we don't want to see in SBOMs
12+
CONTROL_CHARS_MAP = {
13+
8: None, # ASCII/UTF-8: backspace
14+
12: None, # ASCII/UTF-8: formfeed
15+
}
16+
17+
18+
def remove_control_chars_from_value(value: Any) -> Any:
19+
if isinstance(value, str):
20+
return value.translate(CONTROL_CHARS_MAP)
21+
elif isinstance(value, list):
22+
for i in range(len(value)):
23+
value[i] = remove_control_chars_from_value(value[i])
24+
return value
25+
26+
27+
def remove_json_control_chars_hook(pairs: list) -> dict:
28+
return {k: remove_control_chars_from_value(v) for k, v in pairs}
29+
1130

1231
def parse_from_file(file_name: str, encoding: str = "utf-8") -> Document:
1332
with open(file_name, encoding=encoding) as file:
14-
input_doc_as_dict: Dict = json.load(file)
33+
input_doc_as_dict: Dict = json.load(file, object_pairs_hook=remove_json_control_chars_hook)
1534

1635
return JsonLikeDictParser().parse(input_doc_as_dict)
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
{
2+
"spdxVersion": "SPDX-2.2",
3+
"dataLicense": "CC0-1.0",
4+
"SPDXID": "SPDXRef-DOCUMENT",
5+
"creationInfo": {
6+
"created": "2020-11-24T01:12:27Z",
7+
"creators": ["Person: Nisha \b\f K (nishak@vmware.com)"]
8+
},
9+
"name": "golang-dist",
10+
"documentNamespace": "https://swinslow.net/spdx-examples/example7/golang-dist-492dfde4-318b-49f7-b48c-934bfafbde48",
11+
"documentDescribes": ["SPDXRef-golang-dist"],
12+
"packages": [
13+
{
14+
"name": "go1.16.4.linux-amd64",
15+
"SPDXID": "SPDXRef-golang-dist",
16+
"downloadLocation": "https://golang.org/dl/go1.16.4.linux-amd64.tar.gz",
17+
"versionInfo": "1.16.4",
18+
"filesAnalyzed": false,
19+
"checksums": [
20+
{
21+
"algorithm": "SHA256",
22+
"checksumValue": "7154e88f5a8047aad4b80ebace58a059e36e7e2e4eb3b383127a28c711b4ff59"
23+
}
24+
],
25+
"licenseConcluded": "NOASSERTION",
26+
"licenseDeclared": "LicenseRef-Golang-BSD-plus-Patents",
27+
"copyrightText": "Copyright (c) 2009 The Go Authors. \b All rights reserved."
28+
},
29+
{
30+
"name": "go",
31+
"SPDXID": "SPDXRef-go-compiler",
32+
"downloadLocation": "https://golang.org/dl/go1.16.4.linux-amd64.tar.gz",
33+
"versionInfo": "1.16.4",
34+
"filesAnalyzed": false,
35+
"licenseConcluded": "NOASSERTION",
36+
"licenseDeclared": "NOASSERTION",
37+
"copyrightText": "NOASSERTION"
38+
}
39+
],
40+
"hasExtractedLicensingInfos": [
41+
{
42+
"licenseId": "LicenseRef-Golang-BSD-plus-Patents",
43+
"extractedText": "Golang BSD plus Patents \"\\\/\b\f\n\r\t"
44+
}
45+
]
46+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import os
2+
3+
from spdx_tools.spdx.parser.json import json_parser
4+
5+
6+
def test_parse_control_characters():
7+
doc = json_parser.parse_from_file(
8+
os.path.join(os.path.dirname(__file__), "../../data/ControlCharacters.spdx.json")
9+
)
10+
assert doc.creation_info.creators[0].name == "Nisha K"
11+
assert doc.extracted_licensing_info[0].extracted_text == 'Golang BSD plus Patents "\\/\n\r\t'

0 commit comments

Comments
 (0)