Skip to content

Commit 4ab850d

Browse files
committed
Modularise the code
Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com>
1 parent c986b93 commit 4ab850d

File tree

4 files changed

+52
-56
lines changed

4 files changed

+52
-56
lines changed

scanpipe/filters.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,7 @@ def filter(self, qs, value):
411411
("sha1", "sha1"),
412412
("dwarf_included_paths", "dwarf_included_paths"),
413413
("dwarf_compiled_paths", "dwarf_compiled_paths"),
414+
("file_paths", "file_paths"),
414415
)
415416

416417

scanpipe/pipelines/deploy_to_develop.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ def map_elf(self):
195195
Map DWARF paths using similarities of path suffixes.
196196
"""
197197
d2d.map_elfs(project=self.project, logger=self.log)
198-
198+
199199
@group("Go")
200200
def map_go(self):
201201
"""

scanpipe/pipes/d2d.py

Lines changed: 50 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
from collections import defaultdict
2525
from contextlib import suppress
2626
from dataclasses import dataclass
27-
from functools import partial
2827
from pathlib import Path
2928
from re import match as regex_match
3029

@@ -39,9 +38,9 @@
3938
from django.template.defaultfilters import pluralize
4039

4140
from commoncode.paths import common_prefix
42-
from go_inspector.plugin import collect_and_parse_symbols
4341
from elf_inspector.dwarf import get_dwarf_paths
4442
from extractcode import EXTRACT_SUFFIX
43+
from go_inspector.plugin import collect_and_parse_symbols
4544
from packagedcode.npm import NpmPackageJsonHandler
4645
from summarycode.classify import LEGAL_STARTS_ENDS
4746

@@ -1667,46 +1666,30 @@ def _match_purldb_resources_post_process(
16671666
return interesting_codebase_resources.count()
16681667

16691668

1670-
def _map_dwarf_path_resource(
1671-
to_resource,
1672-
from_resources,
1673-
from_resources_index,
1674-
logger=None,
1669+
def _map_paths_resource(
1670+
to_resource, from_resources, from_resources_index, map_types, logger=None
16751671
):
16761672
"""
1677-
Map DWARF dwarf_paths found in the ``to_resource`` extra_data to
1678-
dwarf_paths of the ``from_resources`` CodebaseResource queryset using the
1679-
precomputed ``from_resources_index`` path index.
1673+
Map paths found in the ``to_resource`` extra_data to paths of the ``from_resources``
1674+
CodebaseResource queryset using the precomputed ``from_resources_index`` path index.
16801675
"""
1681-
compiled_paths = to_resource.extra_data.get("compiled_paths") or []
1682-
included_paths = to_resource.extra_data.get("included_paths") or []
1683-
dwarf_paths_and_map_type = [
1684-
(compiled_paths, "dwarf_compiled_paths"),
1685-
(included_paths, "dwarf_included_paths"),
1686-
]
1687-
1688-
dpnm = to_resource.extra_data["dwarf_paths_not_mapped"] = []
16891676
relations = {}
16901677

1691-
for dwarf_paths, map_type in dwarf_paths_and_map_type:
1692-
for dwarf_path in dwarf_paths:
1678+
for map_type in map_types:
1679+
paths = to_resource.extra_data.get(map_type, [])
1680+
not_mapped_paths = to_resource.extra_data[f"{map_type}_not_mapped"] = []
16931681

1694-
match = pathmap.find_paths(dwarf_path, from_resources_index)
1682+
for path in paths:
1683+
match = pathmap.find_paths(path, from_resources_index)
16951684
if not match:
1696-
dpnm.append(dwarf_path)
1685+
not_mapped_paths.append(path)
16971686
continue
16981687

1699-
# short dwarf path matched more than once is treated as not mapped for now
17001688
matched_path_length = match.matched_path_length
1701-
17021689
if matched_path_length == 1 and len(match.resource_ids) != 1:
1703-
dpnm.append(dwarf_path)
1690+
not_mapped_paths.append(path)
17041691
continue
17051692

1706-
# Sort match by most similar to the From/ side dwarf_path e.g. if we match
1707-
# some/foo/bar/baz.c and this/other/foo/bar/baz.c and the From is
1708-
# that/foo/bar/baz.c, some/foo/bar/baz.c has the most segments
1709-
# matched wins, e.g., the shortest From/ path wins.
17101693
matched_from_resources = [
17111694
from_resources.get(id=rid) for rid in match.resource_ids
17121695
]
@@ -1715,13 +1698,10 @@ def _map_dwarf_path_resource(
17151698
)
17161699
winning_from_resource = matched_from_resources[0]
17171700

1718-
# Do not count the "to/" segment as it is not "matchable"
1719-
# always strip leading segment ("to" or from" first segment)
1720-
dwarf_path_length = len(dwarf_path.strip("/").split("/")) - 1
1721-
1701+
path_length = len(path.strip("/").split("/")) - 1
17221702
extra_data = {
1723-
"path_score": f"{matched_path_length}/{dwarf_path_length}",
1724-
"dwarf_path": dwarf_path,
1703+
"path_score": f"{matched_path_length}/{path_length}",
1704+
map_type: path,
17251705
}
17261706

17271707
rel_key = (winning_from_resource.path, to_resource.path, map_type)
@@ -1738,23 +1718,29 @@ def _map_dwarf_path_resource(
17381718
if relations:
17391719
rels = CodebaseRelation.objects.bulk_create(relations.values())
17401720
if logger:
1741-
logger(f"Created {len(rels)} mapping using DWARF for: {to_resource.path!r}")
1721+
logger(
1722+
f"Created {len(rels)} mappings using {', '.join(map_types).upper()} for: {to_resource.path!r}"
1723+
)
17421724
else:
1743-
if logger:
1744-
logger(f"No mapping using DWARF for: {to_resource.path!r}")
1745-
1746-
if dpnm:
1747-
# save the "dwarf dwarf_paths not mapped"
1748-
to_resource.save()
17491725
if logger:
17501726
logger(
1751-
f"WARNING: DWARF paths NOT mapped for: {to_resource.path!r}: "
1752-
+ ", ".join(map(repr, dpnm))
1727+
f"No mappings using {', '.join(map_types).upper()} for: {to_resource.path!r}"
17531728
)
17541729

1730+
for map_type in map_types:
1731+
if to_resource.extra_data[f"{map_type}_not_mapped"]:
1732+
to_resource.save()
1733+
if logger:
1734+
logger(
1735+
f"WARNING: {map_type.upper()} paths NOT mapped for: {to_resource.path!r}: "
1736+
+ ", ".join(
1737+
map(repr, to_resource.extra_data[f"{map_type}_not_mapped"])
1738+
)
1739+
)
1740+
17551741

1756-
def map_paths(project, file_type, collect_paths_func, logger=None):
1757-
"""Map DWARF paths using similarities of path suffixes."""
1742+
def map_paths(project, file_type, collect_paths_func, map_types, logger=None):
1743+
"""Map paths using similarities of path suffixes."""
17581744
project_files = getattr(project.codebaseresources, file_type)()
17591745
from_resources = project_files.from_codebase()
17601746
to_resources = project_files.to_codebase().has_no_relation()
@@ -1779,17 +1765,31 @@ def map_paths(project, file_type, collect_paths_func, logger=None):
17791765
resource_iterator = to_resources.iterator(chunk_size=2000)
17801766
progress = LoopProgress(resource_count, logger)
17811767
for to_resource in progress.iter(resource_iterator):
1782-
_map_dwarf_path_resource(
1768+
_map_paths_resource(
17831769
to_resource,
17841770
from_resources,
17851771
from_resources_index,
1772+
map_types=map_types,
17861773
logger=logger,
17871774
)
17881775

1776+
17891777
def map_elfs(project, logger=None):
1790-
map_paths(project, "elfs", get_dwarf_paths, logger)
1778+
map_paths(
1779+
project,
1780+
"elfs",
1781+
get_dwarf_paths,
1782+
["dwarf_compiled_paths", "dwarf_included_paths"],
1783+
logger,
1784+
)
1785+
1786+
1787+
def get_go_file_paths(location):
1788+
go_symbols = (
1789+
collect_and_parse_symbols(location, check_type=False).get("go_symbols") or {}
1790+
)
1791+
return {"file_paths": go_symbols.get("file_paths") or []}
17911792

17921793

17931794
def map_go_paths(project, logger=None):
1794-
collect_and_parse_symbols_partial = partial(collect_and_parse_symbols, check_type=False)
1795-
map_paths(project, "executable_binaries", collect_and_parse_symbols_partial, logger)
1795+
map_paths(project, "executable_binaries", get_go_file_paths, ["file_paths"], logger)

scanpipe/pipes/elf.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,6 @@
2222

2323
from elf_inspector.dwarf import get_dwarf_paths
2424

25-
from scanpipe import pipes
26-
from scanpipe.models import CodebaseRelation
27-
from scanpipe.pipes import LoopProgress
28-
from scanpipe.pipes import pathmap
29-
3025

3126
def collect_dwarf_source_path_references(resource):
3227
"""Collect and store the DWARF debug paths of the provided ELF ``resource``."""

0 commit comments

Comments
 (0)