Skip to content

Commit 6d291a0

Browse files
committed
Support extraction of JavaScript map files
fixes aboutcode-org/scancode-toolkit#3637 Signed-off-by: Keshav Priyadarshi <git@keshav.space>
1 parent 37197c2 commit 6d291a0

File tree

3 files changed

+104
-0
lines changed

3 files changed

+104
-0
lines changed

src/extractcode/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
file_system = 5
4343
patches = 6
4444
special_package = 7
45+
sources_map = 8
4546

4647
kind_labels = {
4748
1: 'docs',
@@ -51,6 +52,7 @@
5152
5: 'file_system',
5253
6: 'patches',
5354
7: 'special_package',
55+
8: 'sources_map',
5456
}
5557

5658
# note: we do not include special_package in all_kinds by default
@@ -62,6 +64,7 @@
6264
docs,
6365
patches,
6466
special_package,
67+
sources_map
6568
)
6669

6770
default_kinds = (
@@ -79,6 +82,7 @@
7982
'doc': (docs,),
8083
'patch': (patches,),
8184
'special_package': (special_package,),
85+
'sources_map': (sources_map,),
8286
}
8387

8488

src/extractcode/archive.py

+22
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,12 @@
2424
from extractcode import regular_nested
2525
from extractcode import file_system
2626
from extractcode import patches
27+
from extractcode import sources_map
2728
from extractcode import special_package
2829

2930
from extractcode import libarchive2
3031
from extractcode import patch
32+
from extractcode import source_map
3133
from extractcode import sevenzip
3234
from extractcode import vmimage
3335

@@ -506,6 +508,7 @@ def try_to_extract(location, target_dir, extractor):
506508
extract_ishield = sevenzip.extract
507509
extract_Z = sevenzip.extract
508510
extract_xarpkg = sevenzip.extract
511+
extract_source_map = source_map.extract
509512

510513
# Archive handlers.
511514
####################
@@ -1133,6 +1136,24 @@ def try_to_extract(location, target_dir, extractor):
11331136
strict=True
11341137
)
11351138

1139+
SourceMapFileHandler = Handler(
1140+
name='Source Map File',
1141+
filetypes=('json data',),
1142+
mimetypes=('application/json',),
1143+
extensions=(
1144+
'.js.map',
1145+
'.ts.map',
1146+
'.css.map',
1147+
'.less.map',
1148+
'.scss.map',
1149+
'.soy.map',
1150+
'.jsx.map',
1151+
),
1152+
kind=sources_map,
1153+
extractors=[extract_source_map],
1154+
strict=True,
1155+
)
1156+
11361157
# Actual list of handlers
11371158

11381159
archive_handlers = [
@@ -1194,6 +1215,7 @@ def try_to_extract(location, target_dir, extractor):
11941215
QCOWHandler,
11951216
VMDKHandler,
11961217
VirtualBoxHandler,
1218+
SourceMapFileHandler,
11971219
]
11981220

11991221
# only support extracting patches if patch is installed. This is not a default

src/extractcode/source_map.py

+78
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# ScanCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/nexB/extractcode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import json
11+
import os.path
12+
import posixpath
13+
14+
from commoncode import fileutils
15+
from commoncode import paths
16+
17+
import extractcode
18+
19+
"""
20+
Utilities to parse source map files and treat them as if they were
21+
archives containing files.
22+
"""
23+
24+
25+
def extract(location, target_dir):
26+
"""
27+
Extract each source in sourcesContent list of a map file at `location` as
28+
files in a target_dir directory tree mimicking the directory in which the
29+
sources would be present.
30+
31+
Return a list of warning messages. Raise Exception errors.
32+
"""
33+
for path, content in extract_source_content_from_map(location):
34+
# Convert path to safe posix path
35+
map_subfile_path = paths.safe_path(path, preserve_spaces=True)
36+
37+
# Create directories
38+
parent_dir = posixpath.dirname(map_subfile_path)
39+
parent_target_dir = os.path.join(target_dir, parent_dir)
40+
fileutils.create_dir(parent_target_dir)
41+
42+
subfile_path = os.path.join(target_dir, map_subfile_path)
43+
with open(subfile_path, "w") as subfile:
44+
subfile.write(content)
45+
46+
return []
47+
48+
49+
def extract_source_content_from_map(location):
50+
"""
51+
Return a list of tuples of (source, content)
52+
for each source in sourcesContent of a map file at location.
53+
54+
Raise an exception if the file is not a JSON file or cannot be parsed.
55+
"""
56+
try:
57+
with open(location, "r") as map_file:
58+
map_data = json.load(map_file)
59+
except json.JSONDecodeError as e:
60+
msg = f"Unable to decode map file:{location} {e}"
61+
raise extractcode.ExtractErrorFailedToExtract(msg)
62+
63+
if "sourcesContent" in map_data:
64+
sources_content = map_data["sourcesContent"]
65+
sources = map_data.get("sources", [])
66+
67+
# Inconsistent source map. In a valid source map, each entry in the ``sources``
68+
# list should have a corresponding entry in the ``sourcesContent`` list.
69+
# Use dummy filenames as `source` path in such scenario.
70+
if len(sources) != len(sources_content):
71+
sources = [
72+
f"source_content{i + 1}.txt" for i in range(len(sources_content))
73+
]
74+
75+
sources_and_content = list(zip(sources, sources_content))
76+
return sources_and_content
77+
78+
return []

0 commit comments

Comments
 (0)