Skip to content

Replacing target with a checkpoint system #15

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions fuzzware_pipeline/naming_conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,14 @@
MEM_ACCESS_MODE_READ = "r"
MEM_ACCESS_MODE_WRITE = "w"

# the checkpoint naming
CONFIG_ENTRY_CATEGORY_CHECKPOINTS = 'checkpoints'
CONFIG_ENTRY_NAME_CHECKPOINTS_REQUIRED = 'required'
CONFIG_ENTRY_NAME_CHECKPOINTS_BLACKLISTED = 'blacklisted'
CONFIG_ENTRY_NAME_CHECKPOINTS_AVOID = 'avoid'
CONFIG_ENTRY_NAME_CHECKPOINTS_TARGET = 'target'

# the boot naming
CONFIG_ENTRY_CATEGORY_BOOT = 'boot'
CONFIG_ENTRY_NAME_BOOT_REQUIRED = 'required'
CONFIG_ENTRY_NAME_BOOT_BLACKLISTED = 'blacklisted'
Expand Down
94 changes: 90 additions & 4 deletions fuzzware_pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ class Pipeline:
boot_avoided_bbls: set
groundtruth_valid_basic_blocks: set
groundtruth_milestone_basic_blocks: set
# checkpoint logic
checkpoints: dict
current_checkpoint: dict
current_checkpoint_name: str

# Runtime state
start_time: int
Expand Down Expand Up @@ -201,8 +205,12 @@ def check_emulator_dry(self):
exit(1)
logger.info("Emulator dry-run successful!")
os.remove(dry_input)

def parse_pipeline_yml_config(self, full_config):
self.parse_pipeline_boot_config(full_config)
self.parse_pipeline_checkpoint_config(full_config)

def parse_pipeline_boot_config(self, full_config):
self.boot_avoided_bbls = set()
self.boot_required_bbls = set()
boot_config = full_config.get(CONFIG_ENTRY_CATEGORY_BOOT)
Expand All @@ -220,6 +228,49 @@ def parse_pipeline_yml_config(self, full_config):
logger.debug("Avoid list: " + " ".join([hex(addr) for addr in self.boot_avoided_bbls]))
logger.debug("Required: " + " ".join([hex(addr) for addr in self.boot_required_bbls]))

def parse_pipeline_checkpoint_config(self, full_config):
checkpoint_config = full_config.get(CONFIG_ENTRY_CATEGORY_CHECKPOINTS)
checkpoint_configs = {}
if checkpoint_config:
# this is a list of checkpoint objects
# this is the same order as in the config, as dicts preserve insertion order
for checkpoint_name in checkpoint_config.keys():
single_checkpoint_parsed = self.parse_single_checkpoint(checkpoint_config[checkpoint_name])
checkpoint_configs[checkpoint_name] = single_checkpoint_parsed
self.checkpoints = checkpoint_configs
first_checkpoint_key = list(self.checkpoints.keys())[0]
# the current checkpoint always holds the next checkpoint to reach
self.current_checkpoint = checkpoint_configs[first_checkpoint_key]
self.current_checkpoint_name = first_checkpoint_key
# if we have checkpoints, make the last checkpoint the booted_bbl
last_checkpoint_key = list(self.checkpoints.keys())[-1]
last_checkpoint = checkpoint_configs[last_checkpoint_key]
if last_checkpoint["required_bbls"]:
self.boot_required_bbls = last_checkpoint["required_bbls"]
if last_checkpoint["avoided_bbls"]:
self.boot_required_bbls = last_checkpoint["avoided_bbls"]
if self.booted_bbl == DEFAULT_IDLE_BBL:
self.booted_bbl = last_checkpoint["checkpoint_target"]

def parse_single_checkpoint(self, checkpoint_config):
checkpoint = {}
# this parses a single checkpoint
required_bbls = checkpoint_config.get(CONFIG_ENTRY_NAME_CHECKPOINTS_REQUIRED)
if required_bbls:
checkpoint["required_bbls"] = set(map(lambda v: parse_address_value(self.symbols, v)&(~1), required_bbls))
else:
# without else, the entry is not initialised if missing
checkpoint["required_bbls"] = set()
avoided_bbls = checkpoint_config.get(CONFIG_ENTRY_NAME_CHECKPOINTS_AVOID) or checkpoint_config.get(CONFIG_ENTRY_NAME_CHECKPOINTS_BLACKLISTED)
if avoided_bbls:
checkpoint["avoided_bbls"] = set(map(lambda v: parse_address_value(self.symbols, v)&(~1), avoided_bbls))
else:
# without else, the entry is not initialised if missing
checkpoint["avoided_bbls"] = set()
# this one is mandatory, so no check before
checkpoint["checkpoint_target"] = parse_address_value(self.symbols, checkpoint_config[CONFIG_ENTRY_NAME_CHECKPOINTS_TARGET]) & (~1)
return checkpoint

def parse_ground_truth_files(self):
valid_bb_list_path = self.valid_basic_block_list_path
if os.path.exists(valid_bb_list_path):
Expand All @@ -231,6 +282,9 @@ def parse_ground_truth_files(self):

def __init__(self, parent_dir, name, base_inputs, num_main_fuzzer_procs, disable_modeling=False, write_worker_logs=False, do_full_tracing=False, config_name=SESS_FILENAME_CONFIG, timeout_seconds=0, use_aflpp=False):
self.booted_bbl = DEFAULT_IDLE_BBL
self.checkpoints = {}
self.current_checkpoint = {}
self.current_checkpoint_name = ""
self.disable_modeling = disable_modeling
self.shutdown_requested = False
self.sessions = {}
Expand Down Expand Up @@ -430,6 +484,29 @@ def is_successfully_booted(self, bbl_set):
(not self.boot_required_bbls - bbl_set)
)

# this checks if the currently defined checkpoint is hit
def checkpoint_progress(self, bbl_set):
# did we find our checkpoint?
return self.current_checkpoint and (self.current_checkpoint["checkpoint_target"] in bbl_set) and (
# And no blacklist addresses found and all whitelist addresses in bbl set
(not self.current_checkpoint["avoided_bbls"] & bbl_set) and \
(not self.current_checkpoint["required_bbls"] - bbl_set)
)

# set all the fields to the next checkpoint
def update_checkpoint(self):
if self.checkpoints:
checkpoint_names = list(self.checkpoints.keys())
current_index = checkpoint_names.index(self.current_checkpoint_name)
# is this already the last checkpoint?
if (current_index + 1) == len(checkpoint_names):
# then return and do nothing
return
else:
# otherwise update checkpoint name and current checkpoint
self.current_checkpoint_name = checkpoint_names[current_index+1]
self.current_checkpoint = self.checkpoints[self.current_checkpoint_name]

def choose_next_session_inputs(self, config_map):
"""
Determines different sets of input file paths, ordered by desirability
Expand Down Expand Up @@ -528,15 +605,15 @@ def add_main_session(self, prefix_input_candidate=None):

# Try different sets of inputs in order of quality
start_success = False
for input_path_list in self.choose_next_session_inputs(config_map):
input_paths = self.choose_next_session_inputs(config_map)
for input_path_list in input_paths:
# We have previous inputs, carry them over
logger.debug("Copying over {} inputs".format(len(input_path_list)))

new_sess_inputs_dir = self.curr_main_session.base_input_dir
os.mkdir(new_sess_inputs_dir)
for path in input_path_list:
shutil.copy2(path, new_sess_inputs_dir)

self.curr_main_session.minimize_inputs(prefix_candidate_path=prefix_input_candidate, is_previously_used_prefix=is_previously_used_prefix)
# Try the inputs
if self.curr_main_session.start_fuzzers():
Expand Down Expand Up @@ -664,12 +741,21 @@ def handle_queue_forever(self):
logger.info(f"Discovered milestone basic block: 0x{pc:08x}{sym_suffix}")
self.visited_milestone_basic_blocks.add(pc)
self.visited_translation_blocks |= new_bbs

# if this is hit, we are done with our checkpoints!
if (not (self.curr_main_session.prefix_input_path or pending_prefix_candidate)) and self.is_successfully_booted(bbl_set):
logger.info("FOUND MAIN ADDRESS for trace file: '{}'".format(trace_filename))
pending_prefix_candidate = input_for_trace_path(trace_file_path)
restart_pending = True
self.curr_main_session.kill_fuzzers()
# if not, we need to check if we hit one of our checkpoints
elif (not (self.curr_main_session.prefix_input_path or pending_prefix_candidate)) and self.checkpoint_progress(bbl_set):
# we found our checkpoint and fulfilled avoid/visit conditions
logger.info("FOUND CHECKPOINT {} for trace file: '{}'".format(self.current_checkpoint_name, trace_filename))
# set our current input as prefix and restart
pending_prefix_candidate = input_for_trace_path(trace_file_path)
restart_pending = True
# I think we cannot update our checkpoint here because it is still needed for prefix size computation
self.curr_main_session.kill_fuzzers()

logger.debug("Looking at new MMIO access set")
# For every new mmio access trace we get, trigger state generation for unique pc/mmio_addr pairs
Expand Down
2 changes: 1 addition & 1 deletion fuzzware_pipeline/run_target.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,5 @@ def run_target(config_path, input_path, extra_args, get_output=False, silent=Fal
logger.debug("Full command: {}".format(" ".join(arg_list)))

if get_output:
return subprocess.run(arg_list, check=False, stdout=subprocess.PIPE).stdout
return subprocess.run(arg_list, check=False, stdout=subprocess.PIPE).stdout
return subprocess.call(arg_list, stdout=stdout, stderr=stderr)
59 changes: 39 additions & 20 deletions fuzzware_pipeline/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,20 +136,27 @@ def start_fuzzer(self, fuzzer_num):
self.fuzzers.append(fuzzer)
return fuzzer.start(silent=True)

def get_booting_prefix_size(self, input_path):
def get_progress_prefix_size(self, input_path):
"""
For an input file located at input_path, find the prefix size required to reach successful boot.

If booting successful, returns the size of the input prefix.
Otherwise, returns None
"""
gen_traces(self.config_path, input_path, mmio_trace_path=self.temp_mmio_trace_path, bbl_set_path=self.temp_bbl_set_path, extra_args=["--exit-at", "0x{:x}".format(self.parent.booted_bbl)])
checkpoint_target = self.parent.current_checkpoint["checkpoint_target"]
gen_traces(self.config_path, input_path, mmio_trace_path=self.temp_mmio_trace_path, bbl_set_path=self.temp_bbl_set_path, extra_args=["--exit-at", "0x{:x}".format(checkpoint_target)])
bbl_set = set(parse_bbl_set(self.temp_bbl_set_path))
if not self.parent.is_successfully_booted(bbl_set):
# did we find the last checkpoint?
checkpoints_done = self.parent.is_successfully_booted(bbl_set);
# did we find our current checkpoint?
checkpoint_progress = self.parent.checkpoint_progress(bbl_set);
# if neither happened, we do not have an interesting prefix
if not (checkpoints_done or checkpoint_progress):
return None

prefix_size = None
for _, _, _, mode, _, access_fuzz_ind, num_consumed_fuzz_bytes, _, _ in parse_mmio_trace(self.temp_mmio_trace_path)[::-1]:
# count all the consumptions
for evt_id, pc, lr, mode, access_size, access_fuzz_ind, num_consumed_fuzz_bytes, address, _ in parse_mmio_trace(self.temp_mmio_trace_path)[::-1]:
if mode == "r":
prefix_size = access_fuzz_ind + num_consumed_fuzz_bytes
break
Expand All @@ -161,13 +168,14 @@ def get_booting_prefix_size(self, input_path):
# Try expanding input and re-running for a number of times
for _ in range(16):
copy_prefix_to(self.temp_prefix_input_path, input_path, prefix_size)
gen_traces(self.config_path, self.temp_prefix_input_path, mmio_trace_path=self.temp_mmio_trace_path, bbl_set_path=self.temp_bbl_set_path, extra_args=["--exit-at", "0x{:x}".format(self.parent.booted_bbl)])
# gen_traces(self.config_path, self.temp_prefix_input_path, mmio_trace_path=self.temp_mmio_trace_path, bbl_set_path=self.temp_bbl_set_path, extra_args=["--exit-at", "0x{:x}".format(self.parent.booted_bbl)])
checkpoint_target = self.parent.current_checkpoint["checkpoint_target"]
gen_traces(self.config_path, self.temp_prefix_input_path, mmio_trace_path=self.temp_mmio_trace_path, bbl_set_path=self.temp_bbl_set_path, extra_args=["--exit-at", "0x{:x}".format(checkpoint_target)])
bbl_set = set(parse_bbl_set(self.temp_bbl_set_path))

if self.parent.is_successfully_booted(bbl_set):
# if we are done with our checkpoints or reached our current checkpoint, return the prefix size
if self.parent.is_successfully_booted(bbl_set) or self.parent.checkpoint_progress(bbl_set):
return prefix_size
prefix_size += 1

return None

def emulator_args(self):
Expand Down Expand Up @@ -197,25 +205,36 @@ def minimize_inputs(self, silent=False, prefix_candidate_path=None, is_previousl

# Handle cases where prefix candidate is passed
if prefix_candidate_path:
booting_prefix_size = self.get_booting_prefix_size(prefix_candidate_path)
is_booted_successfully = booting_prefix_size is not None
# this returns none when we do not have a prefix
# if it is not none, the current checkpoint is no longer needed
progress_prefix_size = self.get_progress_prefix_size(prefix_candidate_path)
did_some_progress = progress_prefix_size is not None
if is_previously_used_prefix:
if is_booted_successfully:
if did_some_progress:
# the old prefix has had its update processed already
# technically, this cannot happen. An old prefix cannot progress
# further in the checkpoints, can it?
# self.parent.update_checkpoint()
# A previously booting prefix still boots.
# Set the booting prefix and prepend remainder to input files
self.save_prefix_input(prefix_candidate_path, booting_prefix_size)
prepend_to_all(self.base_input_dir, prefix_candidate_path, from_offset=booting_prefix_size)
self.save_prefix_input(prefix_candidate_path, progress_prefix_size)
prepend_to_all(self.base_input_dir, prefix_candidate_path, from_offset=progress_prefix_size)
else:
# The input no longer successfully boots the image
# the prefix did not make progress. Still, keep it as we need it to reach the next prefix
# Attach the no longer booting prefix to input files and minimize without prefix
self.save_prefix_input(prefix_candidate_path, progress_prefix_size)
prepend_to_all(self.base_input_dir, prefix_candidate_path)
else:
if is_booted_successfully:
if did_some_progress:
# Update the checkpoint since we are not none
# does not matter if we are the last checkpoint
# in this case, update checkpoint does nothing
self.parent.update_checkpoint()
# A brand new booting input was discovered, use it as new input prefix and reset to generic inputs
# extract prefix from input, copy over generic base inputs
shutil.rmtree(self.base_input_dir)
shutil.copytree(self.parent.generic_inputs_dir, self.base_input_dir)
self.save_prefix_input(prefix_candidate_path, booting_prefix_size)
self.save_prefix_input(prefix_candidate_path, progress_prefix_size)
# No minimization or input corpus adjustment required in this case, return
return
else:
Expand All @@ -232,9 +251,9 @@ def minimize_inputs(self, silent=False, prefix_candidate_path=None, is_previousl
self.parent.add_warning_line("Minimization for fuzzing session '{}' had no inputs remaining, copying generic inputs.".format(self.name))
shutil.rmtree(self.base_input_dir, True)
shutil.copytree(self.parent.generic_inputs_dir, self.base_input_dir)
except subprocess.CalledProcessError:
except subprocess.CalledProcessError as e:
self.parent.add_warning_line("Minimization for fuzzing session '{}' failed, copying full inputs.".format(self.name))

# In case minimization does not work out, copy all inputs
shutil.rmtree(self.base_input_dir, True)
shutil.copytree(self.temp_minimization_dir, self.base_input_dir)
Expand Down Expand Up @@ -291,11 +310,11 @@ def start_fuzzers(self):
logger.warning("[TRIAGING STEP 1] ... Output end")

logger.warning("\n\n[TRIAGING STEP 2] Re-running single emulation run, showing its output...")
run_target(self.config_path, first_file(self.base_input_dir), self.extra_runtime_args + [ "-v" ])
run_target(self.config_path, first_file(self.base_input_dir), self.extra_runtime_args + [ "-v" ], get_output=True)
logger.warning("[TRIAGING STEP 2] ... Output end\n")

logger.warning("\n\n[TRIAGING STEP 3] Re-running single emulation run with .cur_input file, showing its output...")
run_target(self.config_path, self.fuzzer_cur_input_path(instance.inst_num), self.extra_runtime_args + [ "-v" ])
run_target(self.config_path, self.fuzzer_cur_input_path(instance.inst_num), self.extra_runtime_args + [ "-v" ], get_output=True)
logger.warning("[TRIAGING STEP 3] ... Output end\n")

return False
Expand Down