diff --git a/fuzzware_pipeline/naming_conventions.py b/fuzzware_pipeline/naming_conventions.py index 4145c9e..95f83fe 100644 --- a/fuzzware_pipeline/naming_conventions.py +++ b/fuzzware_pipeline/naming_conventions.py @@ -88,6 +88,14 @@ MEM_ACCESS_MODE_READ = "r" MEM_ACCESS_MODE_WRITE = "w" +# the checkpoint naming +CONFIG_ENTRY_CATEGORY_CHECKPOINTS = 'checkpoints' +CONFIG_ENTRY_NAME_CHECKPOINTS_REQUIRED = 'required' +CONFIG_ENTRY_NAME_CHECKPOINTS_BLACKLISTED = 'blacklisted' +CONFIG_ENTRY_NAME_CHECKPOINTS_AVOID = 'avoid' +CONFIG_ENTRY_NAME_CHECKPOINTS_TARGET = 'target' + +# the boot naming CONFIG_ENTRY_CATEGORY_BOOT = 'boot' CONFIG_ENTRY_NAME_BOOT_REQUIRED = 'required' CONFIG_ENTRY_NAME_BOOT_BLACKLISTED = 'blacklisted' diff --git a/fuzzware_pipeline/pipeline.py b/fuzzware_pipeline/pipeline.py index 78cf52f..8e09139 100644 --- a/fuzzware_pipeline/pipeline.py +++ b/fuzzware_pipeline/pipeline.py @@ -50,6 +50,10 @@ class Pipeline: boot_avoided_bbls: set groundtruth_valid_basic_blocks: set groundtruth_milestone_basic_blocks: set + # checkpoint logic + checkpoints: dict + current_checkpoint: dict + current_checkpoint_name: str # Runtime state start_time: int @@ -201,8 +205,12 @@ def check_emulator_dry(self): exit(1) logger.info("Emulator dry-run successful!") os.remove(dry_input) - + def parse_pipeline_yml_config(self, full_config): + self.parse_pipeline_boot_config(full_config) + self.parse_pipeline_checkpoint_config(full_config) + + def parse_pipeline_boot_config(self, full_config): self.boot_avoided_bbls = set() self.boot_required_bbls = set() boot_config = full_config.get(CONFIG_ENTRY_CATEGORY_BOOT) @@ -220,6 +228,49 @@ def parse_pipeline_yml_config(self, full_config): logger.debug("Avoid list: " + " ".join([hex(addr) for addr in self.boot_avoided_bbls])) logger.debug("Required: " + " ".join([hex(addr) for addr in self.boot_required_bbls])) + def parse_pipeline_checkpoint_config(self, full_config): + checkpoint_config = full_config.get(CONFIG_ENTRY_CATEGORY_CHECKPOINTS) + checkpoint_configs = {} + if checkpoint_config: + # this is a list of checkpoint objects + # this is the same order as in the config, as dicts preserve insertion order + for checkpoint_name in checkpoint_config.keys(): + single_checkpoint_parsed = self.parse_single_checkpoint(checkpoint_config[checkpoint_name]) + checkpoint_configs[checkpoint_name] = single_checkpoint_parsed + self.checkpoints = checkpoint_configs + first_checkpoint_key = list(self.checkpoints.keys())[0] + # the current checkpoint always holds the next checkpoint to reach + self.current_checkpoint = checkpoint_configs[first_checkpoint_key] + self.current_checkpoint_name = first_checkpoint_key + # if we have checkpoints, make the last checkpoint the booted_bbl + last_checkpoint_key = list(self.checkpoints.keys())[-1] + last_checkpoint = checkpoint_configs[last_checkpoint_key] + if last_checkpoint["required_bbls"]: + self.boot_required_bbls = last_checkpoint["required_bbls"] + if last_checkpoint["avoided_bbls"]: + self.boot_required_bbls = last_checkpoint["avoided_bbls"] + if self.booted_bbl == DEFAULT_IDLE_BBL: + self.booted_bbl = last_checkpoint["checkpoint_target"] + + def parse_single_checkpoint(self, checkpoint_config): + checkpoint = {} + # this parses a single checkpoint + required_bbls = checkpoint_config.get(CONFIG_ENTRY_NAME_CHECKPOINTS_REQUIRED) + if required_bbls: + checkpoint["required_bbls"] = set(map(lambda v: parse_address_value(self.symbols, v)&(~1), required_bbls)) + else: + # without else, the entry is not initialised if missing + checkpoint["required_bbls"] = set() + avoided_bbls = checkpoint_config.get(CONFIG_ENTRY_NAME_CHECKPOINTS_AVOID) or checkpoint_config.get(CONFIG_ENTRY_NAME_CHECKPOINTS_BLACKLISTED) + if avoided_bbls: + checkpoint["avoided_bbls"] = set(map(lambda v: parse_address_value(self.symbols, v)&(~1), avoided_bbls)) + else: + # without else, the entry is not initialised if missing + checkpoint["avoided_bbls"] = set() + # this one is mandatory, so no check before + checkpoint["checkpoint_target"] = parse_address_value(self.symbols, checkpoint_config[CONFIG_ENTRY_NAME_CHECKPOINTS_TARGET]) & (~1) + return checkpoint + def parse_ground_truth_files(self): valid_bb_list_path = self.valid_basic_block_list_path if os.path.exists(valid_bb_list_path): @@ -231,6 +282,9 @@ def parse_ground_truth_files(self): def __init__(self, parent_dir, name, base_inputs, num_main_fuzzer_procs, disable_modeling=False, write_worker_logs=False, do_full_tracing=False, config_name=SESS_FILENAME_CONFIG, timeout_seconds=0, use_aflpp=False): self.booted_bbl = DEFAULT_IDLE_BBL + self.checkpoints = {} + self.current_checkpoint = {} + self.current_checkpoint_name = "" self.disable_modeling = disable_modeling self.shutdown_requested = False self.sessions = {} @@ -430,6 +484,29 @@ def is_successfully_booted(self, bbl_set): (not self.boot_required_bbls - bbl_set) ) + # this checks if the currently defined checkpoint is hit + def checkpoint_progress(self, bbl_set): + # did we find our checkpoint? + return self.current_checkpoint and (self.current_checkpoint["checkpoint_target"] in bbl_set) and ( + # And no blacklist addresses found and all whitelist addresses in bbl set + (not self.current_checkpoint["avoided_bbls"] & bbl_set) and \ + (not self.current_checkpoint["required_bbls"] - bbl_set) + ) + + # set all the fields to the next checkpoint + def update_checkpoint(self): + if self.checkpoints: + checkpoint_names = list(self.checkpoints.keys()) + current_index = checkpoint_names.index(self.current_checkpoint_name) + # is this already the last checkpoint? + if (current_index + 1) == len(checkpoint_names): + # then return and do nothing + return + else: + # otherwise update checkpoint name and current checkpoint + self.current_checkpoint_name = checkpoint_names[current_index+1] + self.current_checkpoint = self.checkpoints[self.current_checkpoint_name] + def choose_next_session_inputs(self, config_map): """ Determines different sets of input file paths, ordered by desirability @@ -528,7 +605,8 @@ def add_main_session(self, prefix_input_candidate=None): # Try different sets of inputs in order of quality start_success = False - for input_path_list in self.choose_next_session_inputs(config_map): + input_paths = self.choose_next_session_inputs(config_map) + for input_path_list in input_paths: # We have previous inputs, carry them over logger.debug("Copying over {} inputs".format(len(input_path_list))) @@ -536,7 +614,6 @@ def add_main_session(self, prefix_input_candidate=None): os.mkdir(new_sess_inputs_dir) for path in input_path_list: shutil.copy2(path, new_sess_inputs_dir) - self.curr_main_session.minimize_inputs(prefix_candidate_path=prefix_input_candidate, is_previously_used_prefix=is_previously_used_prefix) # Try the inputs if self.curr_main_session.start_fuzzers(): @@ -664,12 +741,21 @@ def handle_queue_forever(self): logger.info(f"Discovered milestone basic block: 0x{pc:08x}{sym_suffix}") self.visited_milestone_basic_blocks.add(pc) self.visited_translation_blocks |= new_bbs - + # if this is hit, we are done with our checkpoints! if (not (self.curr_main_session.prefix_input_path or pending_prefix_candidate)) and self.is_successfully_booted(bbl_set): logger.info("FOUND MAIN ADDRESS for trace file: '{}'".format(trace_filename)) pending_prefix_candidate = input_for_trace_path(trace_file_path) restart_pending = True self.curr_main_session.kill_fuzzers() + # if not, we need to check if we hit one of our checkpoints + elif (not (self.curr_main_session.prefix_input_path or pending_prefix_candidate)) and self.checkpoint_progress(bbl_set): + # we found our checkpoint and fulfilled avoid/visit conditions + logger.info("FOUND CHECKPOINT {} for trace file: '{}'".format(self.current_checkpoint_name, trace_filename)) + # set our current input as prefix and restart + pending_prefix_candidate = input_for_trace_path(trace_file_path) + restart_pending = True + # I think we cannot update our checkpoint here because it is still needed for prefix size computation + self.curr_main_session.kill_fuzzers() logger.debug("Looking at new MMIO access set") # For every new mmio access trace we get, trigger state generation for unique pc/mmio_addr pairs diff --git a/fuzzware_pipeline/run_target.py b/fuzzware_pipeline/run_target.py index 9f5f792..2825a7b 100644 --- a/fuzzware_pipeline/run_target.py +++ b/fuzzware_pipeline/run_target.py @@ -23,5 +23,5 @@ def run_target(config_path, input_path, extra_args, get_output=False, silent=Fal logger.debug("Full command: {}".format(" ".join(arg_list))) if get_output: - return subprocess.run(arg_list, check=False, stdout=subprocess.PIPE).stdout + return subprocess.run(arg_list, check=False, stdout=subprocess.PIPE).stdout return subprocess.call(arg_list, stdout=stdout, stderr=stderr) diff --git a/fuzzware_pipeline/session.py b/fuzzware_pipeline/session.py index 5b79dd9..975549a 100644 --- a/fuzzware_pipeline/session.py +++ b/fuzzware_pipeline/session.py @@ -136,20 +136,27 @@ def start_fuzzer(self, fuzzer_num): self.fuzzers.append(fuzzer) return fuzzer.start(silent=True) - def get_booting_prefix_size(self, input_path): + def get_progress_prefix_size(self, input_path): """ For an input file located at input_path, find the prefix size required to reach successful boot. If booting successful, returns the size of the input prefix. Otherwise, returns None """ - gen_traces(self.config_path, input_path, mmio_trace_path=self.temp_mmio_trace_path, bbl_set_path=self.temp_bbl_set_path, extra_args=["--exit-at", "0x{:x}".format(self.parent.booted_bbl)]) + checkpoint_target = self.parent.current_checkpoint["checkpoint_target"] + gen_traces(self.config_path, input_path, mmio_trace_path=self.temp_mmio_trace_path, bbl_set_path=self.temp_bbl_set_path, extra_args=["--exit-at", "0x{:x}".format(checkpoint_target)]) bbl_set = set(parse_bbl_set(self.temp_bbl_set_path)) - if not self.parent.is_successfully_booted(bbl_set): + # did we find the last checkpoint? + checkpoints_done = self.parent.is_successfully_booted(bbl_set); + # did we find our current checkpoint? + checkpoint_progress = self.parent.checkpoint_progress(bbl_set); + # if neither happened, we do not have an interesting prefix + if not (checkpoints_done or checkpoint_progress): return None prefix_size = None - for _, _, _, mode, _, access_fuzz_ind, num_consumed_fuzz_bytes, _, _ in parse_mmio_trace(self.temp_mmio_trace_path)[::-1]: + # count all the consumptions + for evt_id, pc, lr, mode, access_size, access_fuzz_ind, num_consumed_fuzz_bytes, address, _ in parse_mmio_trace(self.temp_mmio_trace_path)[::-1]: if mode == "r": prefix_size = access_fuzz_ind + num_consumed_fuzz_bytes break @@ -161,13 +168,14 @@ def get_booting_prefix_size(self, input_path): # Try expanding input and re-running for a number of times for _ in range(16): copy_prefix_to(self.temp_prefix_input_path, input_path, prefix_size) - gen_traces(self.config_path, self.temp_prefix_input_path, mmio_trace_path=self.temp_mmio_trace_path, bbl_set_path=self.temp_bbl_set_path, extra_args=["--exit-at", "0x{:x}".format(self.parent.booted_bbl)]) + # gen_traces(self.config_path, self.temp_prefix_input_path, mmio_trace_path=self.temp_mmio_trace_path, bbl_set_path=self.temp_bbl_set_path, extra_args=["--exit-at", "0x{:x}".format(self.parent.booted_bbl)]) + checkpoint_target = self.parent.current_checkpoint["checkpoint_target"] + gen_traces(self.config_path, self.temp_prefix_input_path, mmio_trace_path=self.temp_mmio_trace_path, bbl_set_path=self.temp_bbl_set_path, extra_args=["--exit-at", "0x{:x}".format(checkpoint_target)]) bbl_set = set(parse_bbl_set(self.temp_bbl_set_path)) - - if self.parent.is_successfully_booted(bbl_set): + # if we are done with our checkpoints or reached our current checkpoint, return the prefix size + if self.parent.is_successfully_booted(bbl_set) or self.parent.checkpoint_progress(bbl_set): return prefix_size prefix_size += 1 - return None def emulator_args(self): @@ -197,25 +205,36 @@ def minimize_inputs(self, silent=False, prefix_candidate_path=None, is_previousl # Handle cases where prefix candidate is passed if prefix_candidate_path: - booting_prefix_size = self.get_booting_prefix_size(prefix_candidate_path) - is_booted_successfully = booting_prefix_size is not None + # this returns none when we do not have a prefix + # if it is not none, the current checkpoint is no longer needed + progress_prefix_size = self.get_progress_prefix_size(prefix_candidate_path) + did_some_progress = progress_prefix_size is not None if is_previously_used_prefix: - if is_booted_successfully: + if did_some_progress: + # the old prefix has had its update processed already + # technically, this cannot happen. An old prefix cannot progress + # further in the checkpoints, can it? + # self.parent.update_checkpoint() # A previously booting prefix still boots. # Set the booting prefix and prepend remainder to input files - self.save_prefix_input(prefix_candidate_path, booting_prefix_size) - prepend_to_all(self.base_input_dir, prefix_candidate_path, from_offset=booting_prefix_size) + self.save_prefix_input(prefix_candidate_path, progress_prefix_size) + prepend_to_all(self.base_input_dir, prefix_candidate_path, from_offset=progress_prefix_size) else: - # The input no longer successfully boots the image + # the prefix did not make progress. Still, keep it as we need it to reach the next prefix # Attach the no longer booting prefix to input files and minimize without prefix + self.save_prefix_input(prefix_candidate_path, progress_prefix_size) prepend_to_all(self.base_input_dir, prefix_candidate_path) else: - if is_booted_successfully: + if did_some_progress: + # Update the checkpoint since we are not none + # does not matter if we are the last checkpoint + # in this case, update checkpoint does nothing + self.parent.update_checkpoint() # A brand new booting input was discovered, use it as new input prefix and reset to generic inputs # extract prefix from input, copy over generic base inputs shutil.rmtree(self.base_input_dir) shutil.copytree(self.parent.generic_inputs_dir, self.base_input_dir) - self.save_prefix_input(prefix_candidate_path, booting_prefix_size) + self.save_prefix_input(prefix_candidate_path, progress_prefix_size) # No minimization or input corpus adjustment required in this case, return return else: @@ -232,9 +251,9 @@ def minimize_inputs(self, silent=False, prefix_candidate_path=None, is_previousl self.parent.add_warning_line("Minimization for fuzzing session '{}' had no inputs remaining, copying generic inputs.".format(self.name)) shutil.rmtree(self.base_input_dir, True) shutil.copytree(self.parent.generic_inputs_dir, self.base_input_dir) - except subprocess.CalledProcessError: + except subprocess.CalledProcessError as e: self.parent.add_warning_line("Minimization for fuzzing session '{}' failed, copying full inputs.".format(self.name)) - + # In case minimization does not work out, copy all inputs shutil.rmtree(self.base_input_dir, True) shutil.copytree(self.temp_minimization_dir, self.base_input_dir) @@ -291,11 +310,11 @@ def start_fuzzers(self): logger.warning("[TRIAGING STEP 1] ... Output end") logger.warning("\n\n[TRIAGING STEP 2] Re-running single emulation run, showing its output...") - run_target(self.config_path, first_file(self.base_input_dir), self.extra_runtime_args + [ "-v" ]) + run_target(self.config_path, first_file(self.base_input_dir), self.extra_runtime_args + [ "-v" ], get_output=True) logger.warning("[TRIAGING STEP 2] ... Output end\n") logger.warning("\n\n[TRIAGING STEP 3] Re-running single emulation run with .cur_input file, showing its output...") - run_target(self.config_path, self.fuzzer_cur_input_path(instance.inst_num), self.extra_runtime_args + [ "-v" ]) + run_target(self.config_path, self.fuzzer_cur_input_path(instance.inst_num), self.extra_runtime_args + [ "-v" ], get_output=True) logger.warning("[TRIAGING STEP 3] ... Output end\n") return False