Skip to content

Commit 52be212

Browse files
committed
Refactor the code
Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com>
1 parent 1c08d6f commit 52be212

File tree

2 files changed

+177
-48
lines changed

2 files changed

+177
-48
lines changed

scanpipe/pipelines/deploy_to_develop.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ def steps(cls):
7070
cls.map_java_to_class,
7171
cls.map_jar_to_source,
7272
cls.map_javascript,
73+
cls.map_elf,
74+
cls.map_go,
7375
cls.match_directories_to_purldb,
7476
cls.match_resources_to_purldb,
7577
cls.map_javascript_post_purldb_match,

scanpipe/pipes/d2d.py

Lines changed: 175 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1666,78 +1666,155 @@ def _match_purldb_resources_post_process(
16661666
return interesting_codebase_resources.count()
16671667

16681668

1669-
def _map_paths_resource(
1669+
def map_paths_resource(
16701670
to_resource, from_resources, from_resources_index, map_types, logger=None
16711671
):
16721672
"""
16731673
Map paths found in the ``to_resource`` extra_data to paths of the ``from_resources``
16741674
CodebaseResource queryset using the precomputed ``from_resources_index`` path index.
1675+
1676+
Args:
1677+
to_resource (CodebaseResource): The resource to map paths for.
1678+
from_resources (QuerySet): The queryset of from resources.
1679+
from_resources_index (dict): Index of from resources.
1680+
map_types (list): List of types of mapping.
1681+
logger (function, optional): Logger function to log messages.
1682+
1683+
Returns
1684+
-------
1685+
None
1686+
16751687
"""
16761688
relations = {}
16771689

16781690
for map_type in map_types:
16791691
paths = to_resource.extra_data.get(map_type, [])
16801692
not_mapped_paths = to_resource.extra_data[f"{map_type}_not_mapped"] = []
1681-
1682-
for path in paths:
1683-
match = pathmap.find_paths(path, from_resources_index)
1684-
if not match:
1685-
not_mapped_paths.append(path)
1686-
continue
1687-
1688-
matched_path_length = match.matched_path_length
1689-
if check_match(match, matched_path_length):
1690-
not_mapped_paths.append(path)
1691-
continue
1692-
1693-
matched_from_resources = [
1694-
from_resources.get(id=rid) for rid in match.resource_ids
1695-
]
1696-
matched_from_resources.sort(
1697-
key=lambda res: (len(res.path.strip("/").split("/")), res.path)
1698-
)
1699-
winning_from_resource = matched_from_resources[0]
1700-
1701-
path_length = len(path.strip("/").split("/")) - 1
1702-
extra_data = {
1703-
"path_score": f"{matched_path_length}/{path_length}",
1704-
map_type: path,
1705-
}
1706-
1707-
rel_key = (winning_from_resource.path, to_resource.path, map_type)
1708-
if rel_key not in relations:
1709-
relation = CodebaseRelation(
1710-
project=winning_from_resource.project,
1711-
from_resource=winning_from_resource,
1712-
to_resource=to_resource,
1713-
map_type=map_type,
1714-
extra_data=extra_data,
1715-
)
1716-
relations[rel_key] = relation
1693+
process_relations(
1694+
to_resource,
1695+
from_resources,
1696+
from_resources_index,
1697+
relations,
1698+
map_type,
1699+
paths,
1700+
not_mapped_paths,
1701+
)
17171702

17181703
if relations:
17191704
rels = CodebaseRelation.objects.bulk_create(relations.values())
17201705
logger(
1721-
f"""Created {len(rels)} mappings using
1722-
{', '.join(map_types).upper()} for: {to_resource.path!r}"""
1706+
f"Created {len(rels)} mappings using "
1707+
f"{', '.join(map_types).upper()} for: {to_resource.path!r}"
17231708
)
17241709
else:
17251710
logger(
1726-
f"""No mappings using {', '.join(map_types).upper()} for:
1727-
{to_resource.path!r}"""
1711+
f"No mappings using {', '.join(map_types).upper()} for: "
1712+
f"{to_resource.path!r}"
17281713
)
17291714

17301715
for map_type in map_types:
17311716
if to_resource.extra_data.get(f"{map_type}_not_mapped"):
17321717
to_resource.save()
17331718
logger(
1734-
f"""WARNING: {map_type.upper()} paths NOT mapped for:
1735-
{to_resource.path!r}: """
1719+
f"WARNING: {map_type.upper()} paths NOT mapped for: "
1720+
f"{to_resource.path!r}: "
17361721
+ ", ".join(map(repr, to_resource.extra_data[f"{map_type}_not_mapped"]))
17371722
)
17381723

17391724

1740-
def check_match(match, matched_path_length):
1725+
def process_relations(
1726+
to_resource,
1727+
from_resources,
1728+
from_resources_index,
1729+
relations,
1730+
map_type,
1731+
paths,
1732+
not_mapped_paths,
1733+
):
1734+
"""
1735+
Process relations between resources.
1736+
1737+
Args:
1738+
to_resource (CodebaseResource): The resource to map paths for.
1739+
from_resources (QuerySet): The queryset of from resources.
1740+
from_resources_index (dict): Index of from resources.
1741+
relations (dict): Dictionary to store relations.
1742+
map_type (str): Type of mapping.
1743+
paths (list): List of paths to map.
1744+
not_mapped_paths (list): List of not mapped paths.
1745+
1746+
Returns
1747+
-------
1748+
None
1749+
1750+
"""
1751+
for path in paths:
1752+
match = pathmap.find_paths(path, from_resources_index)
1753+
if not match:
1754+
not_mapped_paths.append(path)
1755+
continue
1756+
1757+
matched_path_length = match.matched_path_length
1758+
if is_invalid_match(match, matched_path_length):
1759+
not_mapped_paths.append(path)
1760+
continue
1761+
1762+
matched_from_resources = [
1763+
from_resources.get(id=rid) for rid in match.resource_ids
1764+
]
1765+
matched_from_resources = sort_matched_from_resources(matched_from_resources)
1766+
winning_from_resource = matched_from_resources[0]
1767+
1768+
path_length = len(path.strip("/").split("/")) - 1
1769+
extra_data = {
1770+
"path_score": f"{matched_path_length}/{path_length}",
1771+
map_type: path,
1772+
}
1773+
1774+
rel_key = (winning_from_resource.path, to_resource.path, map_type)
1775+
if rel_key not in relations:
1776+
relation = CodebaseRelation(
1777+
project=winning_from_resource.project,
1778+
from_resource=winning_from_resource,
1779+
to_resource=to_resource,
1780+
map_type=map_type,
1781+
extra_data=extra_data,
1782+
)
1783+
relations[rel_key] = relation
1784+
1785+
1786+
def sort_matched_from_resources(matched_from_resources):
1787+
"""
1788+
Sort the list of matched from resources based on path length and path.
1789+
1790+
Args:
1791+
matched_from_resources (list): List of matched CodebaseResource objects.
1792+
1793+
Returns
1794+
-------
1795+
list: Sorted list of CodebaseResource objects.
1796+
1797+
"""
1798+
return sorted(
1799+
matched_from_resources,
1800+
key=lambda res: (len(res.path.strip("/").split("/")), res.path),
1801+
)
1802+
1803+
1804+
def is_invalid_match(match, matched_path_length):
1805+
"""
1806+
Check if the match is invalid based on the matched path length and the number
1807+
of resource IDs.
1808+
1809+
Args:
1810+
match (PathMatch): The path match object.
1811+
matched_path_length (int): The length of the matched path.
1812+
1813+
Returns
1814+
-------
1815+
bool: True if the match is invalid, False otherwise.
1816+
1817+
"""
17411818
return matched_path_length == 1 and len(match.resource_ids) != 1
17421819

17431820

@@ -1746,10 +1823,14 @@ def map_paths(project, file_type, collect_paths_func, map_types, logger=None):
17461823
from_resources = project.codebaseresources.files().from_codebase()
17471824
to_resources = project.codebaseresources.files().to_codebase().has_no_relation()
17481825
to_resources = getattr(to_resources, file_type)()
1826+
resource_count = 0
17491827
for resource in to_resources:
1750-
paths = collect_paths_func(resource.location_path)
1751-
resource.update_extra_data(paths)
1752-
resource_count = to_resources.count()
1828+
try:
1829+
paths = collect_paths_func(resource.location_path)
1830+
resource.update_extra_data(paths)
1831+
resource_count += 1
1832+
except Exception as e:
1833+
logger(f"Can not parse {resource.location_path!r} {e!r}")
17531834

17541835
if logger:
17551836
logger(
@@ -1767,7 +1848,7 @@ def map_paths(project, file_type, collect_paths_func, map_types, logger=None):
17671848
resource_iterator = to_resources.iterator(chunk_size=2000)
17681849
progress = LoopProgress(resource_count, logger)
17691850
for to_resource in progress.iter(resource_iterator):
1770-
_map_paths_resource(
1851+
map_paths_resource(
17711852
to_resource,
17721853
from_resources,
17731854
from_resources_index,
@@ -1777,6 +1858,18 @@ def map_paths(project, file_type, collect_paths_func, map_types, logger=None):
17771858

17781859

17791860
def map_elfs(project, logger=None):
1861+
"""
1862+
Map ELF file paths in a project.
1863+
1864+
Args:
1865+
project (Project): The project to map ELF files for.
1866+
logger (function, optional): Log messages.
1867+
1868+
Returns
1869+
-------
1870+
None
1871+
1872+
"""
17801873
map_paths(
17811874
project=project,
17821875
file_type="elfs",
@@ -1787,6 +1880,17 @@ def map_elfs(project, logger=None):
17871880

17881881

17891882
def get_elf_file_dwarf_paths(location):
1883+
"""
1884+
Retrieve dwarf paths for ELF files.
1885+
1886+
Args:
1887+
location (str): The location of the ELF file.
1888+
1889+
Returns
1890+
-------
1891+
dict: Dictionary containing dwarf paths.
1892+
1893+
"""
17901894
paths = get_dwarf_paths(location)
17911895
return {
17921896
"dwarf_compiled_paths": paths.get("compiled_paths") or [],
@@ -1795,13 +1899,36 @@ def get_elf_file_dwarf_paths(location):
17951899

17961900

17971901
def get_go_file_paths(location):
1902+
"""
1903+
Retrieve Go file paths.
1904+
1905+
Args:
1906+
location (str): The location of the Go file.
1907+
1908+
Returns
1909+
-------
1910+
dict: Dictionary containing Go file paths.
1911+
1912+
"""
17981913
go_symbols = (
17991914
collect_and_parse_symbols(location, check_type=False).get("go_symbols") or {}
18001915
)
18011916
return {"go_file_paths": go_symbols.get("file_paths") or []}
18021917

18031918

18041919
def map_go_paths(project, logger=None):
1920+
"""
1921+
Map Go file paths in a project.
1922+
1923+
Args:
1924+
project (Project): The project to map Go files for.
1925+
logger (function, optional): Log messages.
1926+
1927+
Returns
1928+
-------
1929+
None
1930+
1931+
"""
18051932
map_paths(
18061933
project=project,
18071934
file_type="executable_binaries",

0 commit comments

Comments
 (0)