Skip to content

Support extraction of JavaScript mapfile #58

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/extractcode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
file_system = 5
patches = 6
special_package = 7
sources_map = 8

kind_labels = {
1: 'docs',
Expand All @@ -51,6 +52,7 @@
5: 'file_system',
6: 'patches',
7: 'special_package',
8: 'sources_map',
}

# note: we do not include special_package in all_kinds by default
Expand All @@ -62,6 +64,7 @@
docs,
patches,
special_package,
sources_map
)

default_kinds = (
Expand All @@ -79,6 +82,7 @@
'doc': (docs,),
'patch': (patches,),
'special_package': (special_package,),
'sources_map': (sources_map,),
}


Expand Down
22 changes: 22 additions & 0 deletions src/extractcode/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@
from extractcode import regular_nested
from extractcode import file_system
from extractcode import patches
from extractcode import sources_map
from extractcode import special_package

from extractcode import libarchive2
from extractcode import patch
from extractcode import source_map
from extractcode import sevenzip
from extractcode import vmimage

Expand Down Expand Up @@ -506,6 +508,7 @@ def try_to_extract(location, target_dir, extractor):
extract_ishield = sevenzip.extract
extract_Z = sevenzip.extract
extract_xarpkg = sevenzip.extract
extract_source_map = source_map.extract

# Archive handlers.
####################
Expand Down Expand Up @@ -1133,6 +1136,24 @@ def try_to_extract(location, target_dir, extractor):
strict=True
)

SourceMapFileHandler = Handler(
name='Source Map File',
filetypes=('json data',),
mimetypes=('application/json',),
extensions=(
'.js.map',
'.ts.map',
'.css.map',
'.less.map',
'.scss.map',
'.soy.map',
'.jsx.map',
),
kind=sources_map,
extractors=[extract_source_map],
strict=True,
)

# Actual list of handlers

archive_handlers = [
Expand Down Expand Up @@ -1194,6 +1215,7 @@ def try_to_extract(location, target_dir, extractor):
QCOWHandler,
VMDKHandler,
VirtualBoxHandler,
SourceMapFileHandler,
]

# only support extracting patches if patch is installed. This is not a default
Expand Down
78 changes: 78 additions & 0 deletions src/extractcode/source_map.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# ScanCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/extractcode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import json
import os.path
import posixpath

from commoncode import fileutils
from commoncode import paths

import extractcode

"""
Utilities to parse source map files and treat them as if they were
archives containing files.
"""


def extract(location, target_dir):
"""
Extract each source in sourcesContent list of a map file at `location` as
files in a target_dir directory tree mimicking the directory in which the
sources would be present.

Return a list of warning messages. Raise Exception errors.
"""
for path, content in extract_source_content_from_map(location):
# Convert path to safe posix path
map_subfile_path = paths.safe_path(path, preserve_spaces=True)

# Create directories
parent_dir = posixpath.dirname(map_subfile_path)
parent_target_dir = os.path.join(target_dir, parent_dir)
fileutils.create_dir(parent_target_dir)

subfile_path = os.path.join(target_dir, map_subfile_path)
with open(subfile_path, "w") as subfile:
subfile.write(content)

return []


def extract_source_content_from_map(location):
"""
Return a list of tuples of (source, content)
for each source in sourcesContent of a map file at location.

Raise an exception if the file is not a JSON file or cannot be parsed.
"""
try:
with open(location, "r") as map_file:
map_data = json.load(map_file)
except json.JSONDecodeError as e:
msg = f"Unable to decode map file:{location} {e}"
raise extractcode.ExtractErrorFailedToExtract(msg)

if "sourcesContent" in map_data:
sources_content = map_data["sourcesContent"]
sources = map_data.get("sources", [])

# Inconsistent source map. In a valid source map, each entry in the ``sources``
# list should have a corresponding entry in the ``sourcesContent`` list.
# Use dummy filenames as `source` path in such scenario.
if len(sources) != len(sources_content):
sources = [
f"source_content{i + 1}.txt" for i in range(len(sources_content))
]

sources_and_content = list(zip(sources, sources_content))
return sources_and_content

return []