24
24
from collections import defaultdict
25
25
from contextlib import suppress
26
26
from dataclasses import dataclass
27
- from functools import partial
28
27
from pathlib import Path
29
28
from re import match as regex_match
30
29
39
38
from django .template .defaultfilters import pluralize
40
39
41
40
from commoncode .paths import common_prefix
42
- from go_inspector .plugin import collect_and_parse_symbols
43
41
from elf_inspector .dwarf import get_dwarf_paths
44
42
from extractcode import EXTRACT_SUFFIX
43
+ from go_inspector .plugin import collect_and_parse_symbols
45
44
from packagedcode .npm import NpmPackageJsonHandler
46
45
from summarycode .classify import LEGAL_STARTS_ENDS
47
46
@@ -1667,46 +1666,30 @@ def _match_purldb_resources_post_process(
1667
1666
return interesting_codebase_resources .count ()
1668
1667
1669
1668
1670
- def _map_dwarf_path_resource (
1671
- to_resource ,
1672
- from_resources ,
1673
- from_resources_index ,
1674
- logger = None ,
1669
+ def _map_paths_resource (
1670
+ to_resource , from_resources , from_resources_index , map_types , logger = None
1675
1671
):
1676
1672
"""
1677
- Map DWARF dwarf_paths found in the ``to_resource`` extra_data to
1678
- dwarf_paths of the ``from_resources`` CodebaseResource queryset using the
1679
- precomputed ``from_resources_index`` path index.
1673
+ Map paths found in the ``to_resource`` extra_data to paths of the ``from_resources``
1674
+ CodebaseResource queryset using the precomputed ``from_resources_index`` path index.
1680
1675
"""
1681
- compiled_paths = to_resource .extra_data .get ("compiled_paths" ) or []
1682
- included_paths = to_resource .extra_data .get ("included_paths" ) or []
1683
- dwarf_paths_and_map_type = [
1684
- (compiled_paths , "dwarf_compiled_paths" ),
1685
- (included_paths , "dwarf_included_paths" ),
1686
- ]
1687
-
1688
- dpnm = to_resource .extra_data ["dwarf_paths_not_mapped" ] = []
1689
1676
relations = {}
1690
1677
1691
- for dwarf_paths , map_type in dwarf_paths_and_map_type :
1692
- for dwarf_path in dwarf_paths :
1678
+ for map_type in map_types :
1679
+ paths = to_resource .extra_data .get (map_type , [])
1680
+ not_mapped_paths = to_resource .extra_data [f"{ map_type } _not_mapped" ] = []
1693
1681
1694
- match = pathmap .find_paths (dwarf_path , from_resources_index )
1682
+ for path in paths :
1683
+ match = pathmap .find_paths (path , from_resources_index )
1695
1684
if not match :
1696
- dpnm .append (dwarf_path )
1685
+ not_mapped_paths .append (path )
1697
1686
continue
1698
1687
1699
- # short dwarf path matched more than once is treated as not mapped for now
1700
1688
matched_path_length = match .matched_path_length
1701
-
1702
1689
if matched_path_length == 1 and len (match .resource_ids ) != 1 :
1703
- dpnm .append (dwarf_path )
1690
+ not_mapped_paths .append (path )
1704
1691
continue
1705
1692
1706
- # Sort match by most similar to the From/ side dwarf_path e.g. if we match
1707
- # some/foo/bar/baz.c and this/other/foo/bar/baz.c and the From is
1708
- # that/foo/bar/baz.c, some/foo/bar/baz.c has the most segments
1709
- # matched wins, e.g., the shortest From/ path wins.
1710
1693
matched_from_resources = [
1711
1694
from_resources .get (id = rid ) for rid in match .resource_ids
1712
1695
]
@@ -1715,13 +1698,10 @@ def _map_dwarf_path_resource(
1715
1698
)
1716
1699
winning_from_resource = matched_from_resources [0 ]
1717
1700
1718
- # Do not count the "to/" segment as it is not "matchable"
1719
- # always strip leading segment ("to" or from" first segment)
1720
- dwarf_path_length = len (dwarf_path .strip ("/" ).split ("/" )) - 1
1721
-
1701
+ path_length = len (path .strip ("/" ).split ("/" )) - 1
1722
1702
extra_data = {
1723
- "path_score" : f"{ matched_path_length } /{ dwarf_path_length } " ,
1724
- "dwarf_path" : dwarf_path ,
1703
+ "path_score" : f"{ matched_path_length } /{ path_length } " ,
1704
+ map_type : path ,
1725
1705
}
1726
1706
1727
1707
rel_key = (winning_from_resource .path , to_resource .path , map_type )
@@ -1738,23 +1718,29 @@ def _map_dwarf_path_resource(
1738
1718
if relations :
1739
1719
rels = CodebaseRelation .objects .bulk_create (relations .values ())
1740
1720
if logger :
1741
- logger (f"Created { len (rels )} mapping using DWARF for: { to_resource .path !r} " )
1721
+ logger (
1722
+ f"Created { len (rels )} mappings using { ', ' .join (map_types ).upper ()} for: { to_resource .path !r} "
1723
+ )
1742
1724
else :
1743
- if logger :
1744
- logger (f"No mapping using DWARF for: { to_resource .path !r} " )
1745
-
1746
- if dpnm :
1747
- # save the "dwarf dwarf_paths not mapped"
1748
- to_resource .save ()
1749
1725
if logger :
1750
1726
logger (
1751
- f"WARNING: DWARF paths NOT mapped for: { to_resource .path !r} : "
1752
- + ", " .join (map (repr , dpnm ))
1727
+ f"No mappings using { ', ' .join (map_types ).upper ()} for: { to_resource .path !r} "
1753
1728
)
1754
1729
1730
+ for map_type in map_types :
1731
+ if to_resource .extra_data [f"{ map_type } _not_mapped" ]:
1732
+ to_resource .save ()
1733
+ if logger :
1734
+ logger (
1735
+ f"WARNING: { map_type .upper ()} paths NOT mapped for: { to_resource .path !r} : "
1736
+ + ", " .join (
1737
+ map (repr , to_resource .extra_data [f"{ map_type } _not_mapped" ])
1738
+ )
1739
+ )
1740
+
1755
1741
1756
- def map_paths (project , file_type , collect_paths_func , logger = None ):
1757
- """Map DWARF paths using similarities of path suffixes."""
1742
+ def map_paths (project , file_type , collect_paths_func , map_types , logger = None ):
1743
+ """Map paths using similarities of path suffixes."""
1758
1744
project_files = getattr (project .codebaseresources , file_type )()
1759
1745
from_resources = project_files .from_codebase ()
1760
1746
to_resources = project_files .to_codebase ().has_no_relation ()
@@ -1779,17 +1765,31 @@ def map_paths(project, file_type, collect_paths_func, logger=None):
1779
1765
resource_iterator = to_resources .iterator (chunk_size = 2000 )
1780
1766
progress = LoopProgress (resource_count , logger )
1781
1767
for to_resource in progress .iter (resource_iterator ):
1782
- _map_dwarf_path_resource (
1768
+ _map_paths_resource (
1783
1769
to_resource ,
1784
1770
from_resources ,
1785
1771
from_resources_index ,
1772
+ map_types = map_types ,
1786
1773
logger = logger ,
1787
1774
)
1788
1775
1776
+
1789
1777
def map_elfs (project , logger = None ):
1790
- map_paths (project , "elfs" , get_dwarf_paths , logger )
1778
+ map_paths (
1779
+ project ,
1780
+ "elfs" ,
1781
+ get_dwarf_paths ,
1782
+ ["dwarf_compiled_paths" , "dwarf_included_paths" ],
1783
+ logger ,
1784
+ )
1785
+
1786
+
1787
+ def get_go_file_paths (location ):
1788
+ go_symbols = (
1789
+ collect_and_parse_symbols (location , check_type = False ).get ("go_symbols" ) or {}
1790
+ )
1791
+ return {"file_paths" : go_symbols .get ("file_paths" ) or []}
1791
1792
1792
1793
1793
1794
def map_go_paths (project , logger = None ):
1794
- collect_and_parse_symbols_partial = partial (collect_and_parse_symbols , check_type = False )
1795
- map_paths (project , "executable_binaries" , collect_and_parse_symbols_partial , logger )
1795
+ map_paths (project , "executable_binaries" , get_go_file_paths , ["file_paths" ], logger )
0 commit comments