@@ -1666,78 +1666,155 @@ def _match_purldb_resources_post_process(
1666
1666
return interesting_codebase_resources .count ()
1667
1667
1668
1668
1669
- def _map_paths_resource (
1669
+ def map_paths_resource (
1670
1670
to_resource , from_resources , from_resources_index , map_types , logger = None
1671
1671
):
1672
1672
"""
1673
1673
Map paths found in the ``to_resource`` extra_data to paths of the ``from_resources``
1674
1674
CodebaseResource queryset using the precomputed ``from_resources_index`` path index.
1675
+
1676
+ Args:
1677
+ to_resource (CodebaseResource): The resource to map paths for.
1678
+ from_resources (QuerySet): The queryset of from resources.
1679
+ from_resources_index (dict): Index of from resources.
1680
+ map_types (list): List of types of mapping.
1681
+ logger (function, optional): Logger function to log messages.
1682
+
1683
+ Returns
1684
+ -------
1685
+ None
1686
+
1675
1687
"""
1676
1688
relations = {}
1677
1689
1678
1690
for map_type in map_types :
1679
1691
paths = to_resource .extra_data .get (map_type , [])
1680
1692
not_mapped_paths = to_resource .extra_data [f"{ map_type } _not_mapped" ] = []
1681
-
1682
- for path in paths :
1683
- match = pathmap .find_paths (path , from_resources_index )
1684
- if not match :
1685
- not_mapped_paths .append (path )
1686
- continue
1687
-
1688
- matched_path_length = match .matched_path_length
1689
- if check_match (match , matched_path_length ):
1690
- not_mapped_paths .append (path )
1691
- continue
1692
-
1693
- matched_from_resources = [
1694
- from_resources .get (id = rid ) for rid in match .resource_ids
1695
- ]
1696
- matched_from_resources .sort (
1697
- key = lambda res : (len (res .path .strip ("/" ).split ("/" )), res .path )
1698
- )
1699
- winning_from_resource = matched_from_resources [0 ]
1700
-
1701
- path_length = len (path .strip ("/" ).split ("/" )) - 1
1702
- extra_data = {
1703
- "path_score" : f"{ matched_path_length } /{ path_length } " ,
1704
- map_type : path ,
1705
- }
1706
-
1707
- rel_key = (winning_from_resource .path , to_resource .path , map_type )
1708
- if rel_key not in relations :
1709
- relation = CodebaseRelation (
1710
- project = winning_from_resource .project ,
1711
- from_resource = winning_from_resource ,
1712
- to_resource = to_resource ,
1713
- map_type = map_type ,
1714
- extra_data = extra_data ,
1715
- )
1716
- relations [rel_key ] = relation
1693
+ process_relations (
1694
+ to_resource ,
1695
+ from_resources ,
1696
+ from_resources_index ,
1697
+ relations ,
1698
+ map_type ,
1699
+ paths ,
1700
+ not_mapped_paths ,
1701
+ )
1717
1702
1718
1703
if relations :
1719
1704
rels = CodebaseRelation .objects .bulk_create (relations .values ())
1720
1705
logger (
1721
- f""" Created { len (rels )} mappings using
1722
- { ', ' .join (map_types ).upper ()} for: { to_resource .path !r} "" "
1706
+ f"Created { len (rels )} mappings using "
1707
+ f" { ', ' .join (map_types ).upper ()} for: { to_resource .path !r} "
1723
1708
)
1724
1709
else :
1725
1710
logger (
1726
- f""" No mappings using { ', ' .join (map_types ).upper ()} for:
1727
- { to_resource .path !r} "" "
1711
+ f"No mappings using { ', ' .join (map_types ).upper ()} for: "
1712
+ f" { to_resource .path !r} "
1728
1713
)
1729
1714
1730
1715
for map_type in map_types :
1731
1716
if to_resource .extra_data .get (f"{ map_type } _not_mapped" ):
1732
1717
to_resource .save ()
1733
1718
logger (
1734
- f""" WARNING: { map_type .upper ()} paths NOT mapped for:
1735
- { to_resource .path !r} : "" "
1719
+ f"WARNING: { map_type .upper ()} paths NOT mapped for: "
1720
+ f" { to_resource .path !r} : "
1736
1721
+ ", " .join (map (repr , to_resource .extra_data [f"{ map_type } _not_mapped" ]))
1737
1722
)
1738
1723
1739
1724
1740
- def check_match (match , matched_path_length ):
1725
+ def process_relations (
1726
+ to_resource ,
1727
+ from_resources ,
1728
+ from_resources_index ,
1729
+ relations ,
1730
+ map_type ,
1731
+ paths ,
1732
+ not_mapped_paths ,
1733
+ ):
1734
+ """
1735
+ Process relations between resources.
1736
+
1737
+ Args:
1738
+ to_resource (CodebaseResource): The resource to map paths for.
1739
+ from_resources (QuerySet): The queryset of from resources.
1740
+ from_resources_index (dict): Index of from resources.
1741
+ relations (dict): Dictionary to store relations.
1742
+ map_type (str): Type of mapping.
1743
+ paths (list): List of paths to map.
1744
+ not_mapped_paths (list): List of not mapped paths.
1745
+
1746
+ Returns
1747
+ -------
1748
+ None
1749
+
1750
+ """
1751
+ for path in paths :
1752
+ match = pathmap .find_paths (path , from_resources_index )
1753
+ if not match :
1754
+ not_mapped_paths .append (path )
1755
+ continue
1756
+
1757
+ matched_path_length = match .matched_path_length
1758
+ if is_invalid_match (match , matched_path_length ):
1759
+ not_mapped_paths .append (path )
1760
+ continue
1761
+
1762
+ matched_from_resources = [
1763
+ from_resources .get (id = rid ) for rid in match .resource_ids
1764
+ ]
1765
+ matched_from_resources = sort_matched_from_resources (matched_from_resources )
1766
+ winning_from_resource = matched_from_resources [0 ]
1767
+
1768
+ path_length = len (path .strip ("/" ).split ("/" )) - 1
1769
+ extra_data = {
1770
+ "path_score" : f"{ matched_path_length } /{ path_length } " ,
1771
+ map_type : path ,
1772
+ }
1773
+
1774
+ rel_key = (winning_from_resource .path , to_resource .path , map_type )
1775
+ if rel_key not in relations :
1776
+ relation = CodebaseRelation (
1777
+ project = winning_from_resource .project ,
1778
+ from_resource = winning_from_resource ,
1779
+ to_resource = to_resource ,
1780
+ map_type = map_type ,
1781
+ extra_data = extra_data ,
1782
+ )
1783
+ relations [rel_key ] = relation
1784
+
1785
+
1786
+ def sort_matched_from_resources (matched_from_resources ):
1787
+ """
1788
+ Sort the list of matched from resources based on path length and path.
1789
+
1790
+ Args:
1791
+ matched_from_resources (list): List of matched CodebaseResource objects.
1792
+
1793
+ Returns
1794
+ -------
1795
+ list: Sorted list of CodebaseResource objects.
1796
+
1797
+ """
1798
+ return sorted (
1799
+ matched_from_resources ,
1800
+ key = lambda res : (len (res .path .strip ("/" ).split ("/" )), res .path ),
1801
+ )
1802
+
1803
+
1804
+ def is_invalid_match (match , matched_path_length ):
1805
+ """
1806
+ Check if the match is invalid based on the matched path length and the number
1807
+ of resource IDs.
1808
+
1809
+ Args:
1810
+ match (PathMatch): The path match object.
1811
+ matched_path_length (int): The length of the matched path.
1812
+
1813
+ Returns
1814
+ -------
1815
+ bool: True if the match is invalid, False otherwise.
1816
+
1817
+ """
1741
1818
return matched_path_length == 1 and len (match .resource_ids ) != 1
1742
1819
1743
1820
@@ -1746,10 +1823,14 @@ def map_paths(project, file_type, collect_paths_func, map_types, logger=None):
1746
1823
from_resources = project .codebaseresources .files ().from_codebase ()
1747
1824
to_resources = project .codebaseresources .files ().to_codebase ().has_no_relation ()
1748
1825
to_resources = getattr (to_resources , file_type )()
1826
+ resource_count = 0
1749
1827
for resource in to_resources :
1750
- paths = collect_paths_func (resource .location_path )
1751
- resource .update_extra_data (paths )
1752
- resource_count = to_resources .count ()
1828
+ try :
1829
+ paths = collect_paths_func (resource .location_path )
1830
+ resource .update_extra_data (paths )
1831
+ resource_count += 1
1832
+ except Exception as e :
1833
+ logger (f"Can not parse { resource .location_path !r} { e !r} " )
1753
1834
1754
1835
if logger :
1755
1836
logger (
@@ -1767,7 +1848,7 @@ def map_paths(project, file_type, collect_paths_func, map_types, logger=None):
1767
1848
resource_iterator = to_resources .iterator (chunk_size = 2000 )
1768
1849
progress = LoopProgress (resource_count , logger )
1769
1850
for to_resource in progress .iter (resource_iterator ):
1770
- _map_paths_resource (
1851
+ map_paths_resource (
1771
1852
to_resource ,
1772
1853
from_resources ,
1773
1854
from_resources_index ,
@@ -1777,6 +1858,18 @@ def map_paths(project, file_type, collect_paths_func, map_types, logger=None):
1777
1858
1778
1859
1779
1860
def map_elfs (project , logger = None ):
1861
+ """
1862
+ Map ELF file paths in a project.
1863
+
1864
+ Args:
1865
+ project (Project): The project to map ELF files for.
1866
+ logger (function, optional): Log messages.
1867
+
1868
+ Returns
1869
+ -------
1870
+ None
1871
+
1872
+ """
1780
1873
map_paths (
1781
1874
project = project ,
1782
1875
file_type = "elfs" ,
@@ -1787,6 +1880,17 @@ def map_elfs(project, logger=None):
1787
1880
1788
1881
1789
1882
def get_elf_file_dwarf_paths (location ):
1883
+ """
1884
+ Retrieve dwarf paths for ELF files.
1885
+
1886
+ Args:
1887
+ location (str): The location of the ELF file.
1888
+
1889
+ Returns
1890
+ -------
1891
+ dict: Dictionary containing dwarf paths.
1892
+
1893
+ """
1790
1894
paths = get_dwarf_paths (location )
1791
1895
return {
1792
1896
"dwarf_compiled_paths" : paths .get ("compiled_paths" ) or [],
@@ -1795,13 +1899,36 @@ def get_elf_file_dwarf_paths(location):
1795
1899
1796
1900
1797
1901
def get_go_file_paths (location ):
1902
+ """
1903
+ Retrieve Go file paths.
1904
+
1905
+ Args:
1906
+ location (str): The location of the Go file.
1907
+
1908
+ Returns
1909
+ -------
1910
+ dict: Dictionary containing Go file paths.
1911
+
1912
+ """
1798
1913
go_symbols = (
1799
1914
collect_and_parse_symbols (location , check_type = False ).get ("go_symbols" ) or {}
1800
1915
)
1801
1916
return {"go_file_paths" : go_symbols .get ("file_paths" ) or []}
1802
1917
1803
1918
1804
1919
def map_go_paths (project , logger = None ):
1920
+ """
1921
+ Map Go file paths in a project.
1922
+
1923
+ Args:
1924
+ project (Project): The project to map Go files for.
1925
+ logger (function, optional): Log messages.
1926
+
1927
+ Returns
1928
+ -------
1929
+ None
1930
+
1931
+ """
1805
1932
map_paths (
1806
1933
project = project ,
1807
1934
file_type = "executable_binaries" ,
0 commit comments