Skip to content

Commit debf332

Browse files
Add d2d symbols matching for winpe macho binaries (#1674)
* Add d2d symbols matching for winpe macho binaries Reference: #1431 Reference: #1432 Reference: #1433 Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com> * Use newly released source-inspector v0.6.0 Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com> * Bump binary-inspector to v0.1.2 Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com> * Add test as examples for macho/winpe symbol matching Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com> --------- Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
1 parent 5f21555 commit debf332

File tree

10 files changed

+144
-1
lines changed

10 files changed

+144
-1
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ local
5151
*.rdb
5252
*.aof
5353
.vscode
54+
.ipynb_checkpoints
5455

5556
# This is only created when packaging for external redistribution
5657
/thirdparty/

scanpipe/pipelines/deploy_to_develop.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ def steps(cls):
7373
cls.map_javascript,
7474
cls.map_javascript_symbols,
7575
cls.map_elf,
76+
cls.map_macho,
77+
cls.map_winpe,
7678
cls.map_go,
7779
cls.map_rust,
7880
cls.match_directories_to_purldb,
@@ -208,6 +210,16 @@ def map_elf(self):
208210
d2d.map_elfs_with_dwarf_paths(project=self.project, logger=self.log)
209211
d2d.map_elfs_binaries_with_symbols(project=self.project, logger=self.log)
210212

213+
@optional_step("MacOS")
214+
def map_macho(self):
215+
"""Map mach0 binaries to their sources using symbols."""
216+
d2d.map_macho_binaries_with_symbols(project=self.project, logger=self.log)
217+
218+
@optional_step("Windows")
219+
def map_winpe(self):
220+
"""Map winpe binaries to their sources using symbols."""
221+
d2d.map_winpe_binaries_with_symbols(project=self.project, logger=self.log)
222+
211223
@optional_step("Go")
212224
def map_go(self):
213225
"""Map Go binaries to their sources using paths."""

scanpipe/pipes/d2d.py

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@
3838
from django.db.models.functions import Concat
3939
from django.template.defaultfilters import pluralize
4040

41+
from binary_inspector.binary import collect_and_parse_macho_symbols
42+
from binary_inspector.binary import collect_and_parse_winpe_symbols
4143
from commoncode.paths import common_prefix
4244
from elf_inspector.binary import collect_and_parse_elf_symbols
4345
from elf_inspector.dwarf import get_dwarf_paths
@@ -1942,7 +1944,7 @@ def map_elfs_binaries_with_symbols(project, logger=None):
19421944
project.codebaseresources.files().to_codebase().has_no_relation().elfs()
19431945
)
19441946

1945-
# Collect source symbols from rust source files
1947+
# Collect source symbols from elf related source files
19461948
elf_from_resources = from_resources.filter(extension__in=[".c", ".cpp", ".h"])
19471949

19481950
map_binaries_with_symbols(
@@ -1955,6 +1957,53 @@ def map_elfs_binaries_with_symbols(project, logger=None):
19551957
)
19561958

19571959

1960+
def map_macho_binaries_with_symbols(project, logger=None):
1961+
"""Map macho binaries to their source using symbols in ``project``."""
1962+
from_resources = project.codebaseresources.files().from_codebase()
1963+
macho_binaries = (
1964+
project.codebaseresources.files()
1965+
.to_codebase()
1966+
.has_no_relation()
1967+
.macho_binaries()
1968+
)
1969+
1970+
# Collect source symbols from macos related source files
1971+
mac_from_resources = from_resources.filter(
1972+
extension__in=[".c", ".cpp", ".h", ".m", ".swift"]
1973+
)
1974+
1975+
map_binaries_with_symbols(
1976+
project=project,
1977+
from_resources=mac_from_resources,
1978+
to_resources=macho_binaries,
1979+
binary_symbols_func=collect_and_parse_macho_symbols,
1980+
map_type="macho_symbols",
1981+
logger=logger,
1982+
)
1983+
1984+
1985+
def map_winpe_binaries_with_symbols(project, logger=None):
1986+
"""Map winpe binaries to their source using symbols in ``project``."""
1987+
from_resources = project.codebaseresources.files().from_codebase()
1988+
winexe_binaries = (
1989+
project.codebaseresources.files().to_codebase().has_no_relation().win_exes()
1990+
)
1991+
1992+
# Collect source symbols from windows related source files
1993+
windows_from_resources = from_resources.filter(
1994+
extension__in=[".c", ".cpp", ".h", ".cs"]
1995+
)
1996+
1997+
map_binaries_with_symbols(
1998+
project=project,
1999+
from_resources=windows_from_resources,
2000+
to_resources=winexe_binaries,
2001+
binary_symbols_func=collect_and_parse_winpe_symbols,
2002+
map_type="winpe_symbols",
2003+
logger=logger,
2004+
)
2005+
2006+
19582007
def map_binaries_with_symbols(
19592008
project,
19602009
from_resources,

scanpipe/pipes/symbolmap.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@
4141
SMALL_FILE_SYMBOLS_THRESHOLD = 20
4242
MATCHING_RATIO_RUST_SMALL_FILE = 0.4
4343
MATCHING_RATIO_ELF = 0.05
44+
MATCHING_RATIO_MACHO = 0.15
45+
MATCHING_RATIO_WINPE = 0.15
4446
MATCHING_RATIO_JAVASCRIPT = 0.7
4547
SMALL_FILE_SYMBOLS_THRESHOLD_JAVASCRIPT = 30
4648
MATCHING_RATIO_JAVASCRIPT_SMALL_FILE = 0.5
@@ -209,6 +211,22 @@ def match_source_symbols_to_binary(source_symbols, binary_symbols, map_type):
209211
return True, stats
210212
else:
211213
return False, stats
214+
elif map_type == "macho_symbols":
215+
if (
216+
matched_symbols_ratio > MATCHING_RATIO_MACHO
217+
or matched_symbols_unique_ratio > MATCHING_RATIO_MACHO
218+
):
219+
return True, stats
220+
else:
221+
return False, stats
222+
elif map_type == "winpe_symbols":
223+
if (
224+
matched_symbols_ratio > MATCHING_RATIO_WINPE
225+
or matched_symbols_unique_ratio > MATCHING_RATIO_WINPE
226+
):
227+
return True, stats
228+
else:
229+
return False, stats
212230

213231

214232
def match_source_paths_to_binary(
22.8 KB
Binary file not shown.
44.8 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.

scanpipe/tests/pipes/test_d2d.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1599,6 +1599,68 @@ def test_scanpipe_pipes_d2d_map_elf_symbols(self):
15991599
).count(),
16001600
)
16011601

1602+
@skipIf(sys.platform == "darwin", "Test is failing on macOS")
1603+
def test_scanpipe_pipes_d2d_map_macho_symbols(self):
1604+
input_dir = self.project1.input_path
1605+
input_resources = [
1606+
self.data / "d2d-macho/from-lumen.zip",
1607+
self.data / "d2d-macho/to-lumen.zip",
1608+
]
1609+
copy_inputs(input_resources, input_dir)
1610+
self.from_files, self.to_files = d2d.get_inputs(self.project1)
1611+
inputs_with_codebase_path_destination = [
1612+
(self.from_files, self.project1.codebase_path / d2d.FROM),
1613+
(self.to_files, self.project1.codebase_path / d2d.TO),
1614+
]
1615+
for input_files, codebase_path in inputs_with_codebase_path_destination:
1616+
for input_file_path in input_files:
1617+
scancode.extract_archive(input_file_path, codebase_path)
1618+
1619+
scancode.extract_archives(
1620+
self.project1.codebase_path,
1621+
recurse=True,
1622+
)
1623+
pipes.collect_and_create_codebase_resources(self.project1)
1624+
buffer = io.StringIO()
1625+
d2d.map_macho_binaries_with_symbols(project=self.project1, logger=buffer.write)
1626+
self.assertEqual(
1627+
9,
1628+
CodebaseRelation.objects.filter(
1629+
project=self.project1, map_type="macho_symbols"
1630+
).count(),
1631+
)
1632+
1633+
@skipIf(sys.platform == "darwin", "Test is failing on macOS")
1634+
def test_scanpipe_pipes_d2d_map_winpe_symbols(self):
1635+
input_dir = self.project1.input_path
1636+
input_resources = [
1637+
self.data / "d2d-winpe/to-translucent.zip",
1638+
self.data / "d2d-winpe/from-translucent.zip",
1639+
]
1640+
copy_inputs(input_resources, input_dir)
1641+
self.from_files, self.to_files = d2d.get_inputs(self.project1)
1642+
inputs_with_codebase_path_destination = [
1643+
(self.from_files, self.project1.codebase_path / d2d.FROM),
1644+
(self.to_files, self.project1.codebase_path / d2d.TO),
1645+
]
1646+
for input_files, codebase_path in inputs_with_codebase_path_destination:
1647+
for input_file_path in input_files:
1648+
scancode.extract_archive(input_file_path, codebase_path)
1649+
1650+
scancode.extract_archives(
1651+
self.project1.codebase_path,
1652+
recurse=True,
1653+
)
1654+
pipes.collect_and_create_codebase_resources(self.project1)
1655+
buffer = io.StringIO()
1656+
d2d.map_winpe_binaries_with_symbols(project=self.project1, logger=buffer.write)
1657+
self.assertEqual(
1658+
4,
1659+
CodebaseRelation.objects.filter(
1660+
project=self.project1, map_type="winpe_symbols"
1661+
).count(),
1662+
)
1663+
16021664
@mock.patch("scanpipe.pipes.purldb.match_resources")
16031665
def test_scanpipe_pipes_d2d_match_purldb_resource_no_package_data(
16041666
self, mock_match_resource

setup.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ install_requires =
8484
elf-inspector==0.0.3
8585
go-inspector==0.5.0
8686
rust-inspector==0.1.0
87+
binary-inspector==0.1.2
8788
python-inspector==0.14.0
8889
source-inspector==0.6.1; sys_platform != "darwin" and platform_machine != "arm64"
8990
aboutcode-toolkit==11.1.1

0 commit comments

Comments
 (0)