41
41
from extractcode import EXTRACT_SUFFIX
42
42
from packagedcode .npm import NpmPackageJsonHandler
43
43
from summarycode .classify import LEGAL_STARTS_ENDS
44
+ from elf_inspector .dwarf import get_dwarf_paths
44
45
45
46
from scanpipe import pipes
46
47
from scanpipe .models import CodebaseRelation
@@ -1662,3 +1663,117 @@ def _match_purldb_resources_post_process(
1662
1663
package .add_resources (unmapped_resources )
1663
1664
1664
1665
return interesting_codebase_resources .count ()
1666
+
1667
+
1668
+ def _map_dwarf_path_resource (
1669
+ to_resource , from_resources , from_resources_index , logger = None ,
1670
+ ):
1671
+ """
1672
+ Map DWARF dwarf_paths found in the ``to_resource`` extra_data to
1673
+ dwarf_paths of the ``from_resources`` CodebaseResource queryset using the
1674
+ precomputed ``from_resources_index`` path index.
1675
+ """
1676
+ compiled_paths = to_resource .extra_data .get ("compiled_paths" ) or []
1677
+ included_paths = to_resource .extra_data .get ("included_paths" ) or []
1678
+ dwarf_paths_and_map_type = [
1679
+ (compiled_paths , "dwarf_compiled_paths" ),
1680
+ (included_paths , "dwarf_included_paths" ),
1681
+ ]
1682
+
1683
+ dpnm = to_resource .extra_data ["dwarf_paths_not_mapped" ] = []
1684
+ relations = {}
1685
+
1686
+ for dwarf_paths , map_type in dwarf_paths_and_map_type :
1687
+ for dwarf_path in dwarf_paths :
1688
+
1689
+ match = pathmap .find_paths (dwarf_path , from_resources_index )
1690
+ if not match :
1691
+ dpnm .append (dwarf_path )
1692
+ continue
1693
+
1694
+ # short dwarf path matched more than once is treated as not mapped for now
1695
+ matched_path_length = match .matched_path_length
1696
+
1697
+ if matched_path_length == 1 and len (match .resource_ids ) != 1 :
1698
+ dpnm .append (dwarf_path )
1699
+ continue
1700
+
1701
+ # Sort match by most similar to the From/ side dwarf_path e.g. if we match
1702
+ # some/foo/bar/baz.c and this/other/foo/bar/baz.c and the From is
1703
+ # that/foo/bar/baz.c, some/foo/bar/baz.c has the most segments
1704
+ # matched wins, e.g., the shortest From/ path wins.
1705
+ matched_from_resources = [
1706
+ from_resources .get (id = rid ) for rid in match .resource_ids
1707
+ ]
1708
+ matched_from_resources .sort (key = lambda res : (len (res .path .strip ("/" ).split ("/" )), res .path ))
1709
+ winning_from_resource = matched_from_resources [0 ]
1710
+
1711
+ # Do not count the "to/" segment as it is not "matchable"
1712
+ # always strip leading segment ("to" or from" first segment)
1713
+ dwarf_path_length = len (dwarf_path .strip ("/" ).split ("/" )) - 1
1714
+
1715
+ extra_data = {
1716
+ "path_score" : f"{ matched_path_length } /{ dwarf_path_length } " ,
1717
+ "dwarf_path" : dwarf_path ,
1718
+ }
1719
+
1720
+ rel_key = (winning_from_resource .path , to_resource .path , map_type )
1721
+ if rel_key not in relations :
1722
+ relation = CodebaseRelation (
1723
+ project = winning_from_resource .project ,
1724
+ from_resource = winning_from_resource ,
1725
+ to_resource = to_resource ,
1726
+ map_type = map_type ,
1727
+ extra_data = extra_data ,
1728
+ )
1729
+ relations [rel_key ] = relation
1730
+
1731
+ if relations :
1732
+ rels = CodebaseRelation .objects .bulk_create (relations .values ())
1733
+ if logger :
1734
+ logger (f"Created { len (rels )} mapping using DWARF for: { to_resource .path !r} " )
1735
+ else :
1736
+ if logger :
1737
+ logger (f"No mapping using DWARF for: { to_resource .path !r} " )
1738
+
1739
+ if dpnm :
1740
+ # save the "dwarf dwarf_paths not mapped"
1741
+ to_resource .save ()
1742
+ if logger :
1743
+ logger (f"WARNING: DWARF paths NOT mapped for: { to_resource .path !r} : " + ", " .join (map (repr , dpnm )))
1744
+
1745
+
1746
+ def map_elf (project , logger = None ):
1747
+ """Map DWARF paths using similarities of path suffixes."""
1748
+ project_files = project .codebaseresources .elfs ().no_status ()
1749
+ from_resources = project_files .from_codebase ()
1750
+ to_resources = project_files .to_codebase ().has_no_relation ()
1751
+ for resource in to_resources :
1752
+ dwarf_paths = get_dwarf_paths (resource .location_path )
1753
+ resource .update_extra_data (dwarf_paths )
1754
+ resource_count = to_resources .count ()
1755
+
1756
+ if logger :
1757
+ logger (
1758
+ f"Mapping { resource_count :,d} to/ resources using DWARF paths "
1759
+ f"with { from_resources .count ():,d} from/ resources."
1760
+ )
1761
+
1762
+ from_resources_index = pathmap .build_index (
1763
+ from_resources .values_list ("id" , "path" ), with_subpaths = True
1764
+ )
1765
+
1766
+ if logger :
1767
+ logger (
1768
+ f"Done building from/ resources index."
1769
+ )
1770
+
1771
+ resource_iterator = to_resources .iterator (chunk_size = 2000 )
1772
+ progress = LoopProgress (resource_count , logger )
1773
+ for to_resource in progress .iter (resource_iterator ):
1774
+ _map_dwarf_path_resource (
1775
+ to_resource ,
1776
+ from_resources ,
1777
+ from_resources_index ,
1778
+ logger = logger ,
1779
+ )
0 commit comments