Skip to content

Commit c9122bd

Browse files
Match binaries to source using elf symbols (#1621)
* Map elf binary to sources using symbols * Refactor symbol matching to debug match quality * Use elf-inspector v0.0.3 * Improve test coverage for elf symbols matching Reference: #1403 Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
1 parent 15a3438 commit c9122bd

File tree

11 files changed

+4323
-67
lines changed

11 files changed

+4323
-67
lines changed

scanpipe/pipelines/deploy_to_develop.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -204,8 +204,9 @@ def map_javascript_symbols(self):
204204

205205
@optional_step("Elf")
206206
def map_elf(self):
207-
"""Map ELF binaries to their sources."""
208-
d2d.map_elfs(project=self.project, logger=self.log)
207+
"""Map ELF binaries to their sources using dwarf paths and symbols."""
208+
d2d.map_elfs_with_dwarf_paths(project=self.project, logger=self.log)
209+
d2d.map_elfs_binaries_with_symbols(project=self.project, logger=self.log)
209210

210211
@optional_step("Go")
211212
def map_go(self):
@@ -215,7 +216,7 @@ def map_go(self):
215216
@optional_step("Rust")
216217
def map_rust(self):
217218
"""Map Rust binaries to their sources using symbols."""
218-
d2d.map_rust_paths(project=self.project, logger=self.log)
219+
d2d.map_rust_binaries_with_symbols(project=self.project, logger=self.log)
219220

220221
def match_directories_to_purldb(self):
221222
"""Match selected directories in PurlDB."""

scanpipe/pipes/d2d.py

Lines changed: 54 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
from django.template.defaultfilters import pluralize
4040

4141
from commoncode.paths import common_prefix
42+
from elf_inspector.binary import collect_and_parse_elf_symbols
4243
from elf_inspector.dwarf import get_dwarf_paths
4344
from extractcode import EXTRACT_SUFFIX
4445
from go_inspector.plugin import collect_and_parse_symbols
@@ -1708,11 +1709,11 @@ def map_paths_resource(
17081709
relations_to_create[rel_key] = relation
17091710
if paths_not_mapped:
17101711
to_resource.status = flag.REQUIRES_REVIEW
1711-
to_resource.save()
17121712
logger(
17131713
f"WARNING: #{len(paths_not_mapped)} {map_type} paths NOT mapped for: "
17141714
f"{to_resource.path!r}"
17151715
)
1716+
to_resource.save()
17161717

17171718
if relations_to_create:
17181719
rels = CodebaseRelation.objects.bulk_create(relations_to_create.values())
@@ -1795,7 +1796,7 @@ def is_invalid_match(match, matched_path_length):
17951796
return matched_path_length == 1 and len(match.resource_ids) != 1
17961797

17971798

1798-
def map_elfs(project, logger=None):
1799+
def map_elfs_with_dwarf_paths(project, logger=None):
17991800
"""Map ELF binaries to their sources in ``project``."""
18001801
from_resources = project.codebaseresources.files().from_codebase()
18011802
to_resources = (
@@ -1911,10 +1912,10 @@ def map_go_paths(project, logger=None):
19111912
)
19121913

19131914

1914-
def map_rust_paths(project, logger=None):
1915-
"""Map Rust binaries to their source in ``project``."""
1915+
def map_rust_binaries_with_symbols(project, logger=None):
1916+
"""Map Rust binaries to their source using symbols in ``project``."""
19161917
from_resources = project.codebaseresources.files().from_codebase()
1917-
to_resources = (
1918+
to_binaries = (
19181919
project.codebaseresources.files()
19191920
.to_codebase()
19201921
.has_no_relation()
@@ -1923,41 +1924,82 @@ def map_rust_paths(project, logger=None):
19231924

19241925
# Collect source symbols from rust source files
19251926
rust_from_resources = from_resources.filter(extension=".rs")
1927+
1928+
map_binaries_with_symbols(
1929+
project=project,
1930+
from_resources=rust_from_resources,
1931+
to_resources=to_binaries,
1932+
binary_symbols_func=collect_and_parse_rust_symbols,
1933+
map_type="rust_symbols",
1934+
logger=logger,
1935+
)
1936+
1937+
1938+
def map_elfs_binaries_with_symbols(project, logger=None):
1939+
"""Map Elf binaries to their source using symbols in ``project``."""
1940+
from_resources = project.codebaseresources.files().from_codebase()
1941+
elf_binaries = (
1942+
project.codebaseresources.files().to_codebase().has_no_relation().elfs()
1943+
)
1944+
1945+
# Collect source symbols from rust source files
1946+
elf_from_resources = from_resources.filter(extension__in=[".c", ".cpp", ".h"])
1947+
1948+
map_binaries_with_symbols(
1949+
project=project,
1950+
from_resources=elf_from_resources,
1951+
to_resources=elf_binaries,
1952+
binary_symbols_func=collect_and_parse_elf_symbols,
1953+
map_type="elf_symbols",
1954+
logger=logger,
1955+
)
1956+
1957+
1958+
def map_binaries_with_symbols(
1959+
project,
1960+
from_resources,
1961+
to_resources,
1962+
binary_symbols_func,
1963+
map_type,
1964+
logger=None,
1965+
):
1966+
"""Map Binaries to their source using symbols in ``project``."""
19261967
symbols.collect_and_store_tree_sitter_symbols_and_strings(
19271968
project=project,
19281969
logger=logger,
1929-
project_files=rust_from_resources,
1970+
project_files=from_resources,
19301971
)
19311972

19321973
# Collect binary symbols from rust binaries
19331974
for resource in to_resources:
19341975
try:
1935-
binary_symbols = collect_and_parse_rust_symbols(resource.location_path)
1976+
binary_symbols = binary_symbols_func(resource.location)
19361977
resource.update_extra_data(binary_symbols)
19371978
except Exception as e:
1938-
logger(f"Can not parse {resource.location_path!r} {e!r}")
1979+
logger(f"Error parsing binary symbols at: {resource.location_path!r} {e!r}")
19391980

19401981
if logger:
19411982
logger(
19421983
f"Mapping {to_resources.count():,d} to/ resources using symbols "
1943-
f"with {rust_from_resources.count():,d} from/ resources."
1984+
f"with {from_resources.count():,d} from/ resources."
19441985
)
19451986

19461987
resource_iterator = to_resources.iterator(chunk_size=2000)
19471988
progress = LoopProgress(to_resources.count(), logger)
19481989
for to_resource in progress.iter(resource_iterator):
1949-
binary_symbols = to_resource.extra_data.get("rust_symbols")
1990+
binary_symbols = to_resource.extra_data.get(map_type)
19501991
if not binary_symbols:
19511992
continue
19521993

19531994
if logger:
19541995
logger(f"Mapping source files to binary at {to_resource.path}")
19551996

19561997
symbolmap.map_resources_with_symbols(
1998+
project=project,
19571999
to_resource=to_resource,
1958-
from_resources=rust_from_resources,
2000+
from_resources=from_resources,
19592001
binary_symbols=binary_symbols,
1960-
map_type="rust_symbols",
2002+
map_type=map_type,
19612003
logger=logger,
19622004
)
19632005

0 commit comments

Comments
 (0)