From 949eac91aa52bd125592b42b616fa713bf32bcec Mon Sep 17 00:00:00 2001 From: Simeon Hoffmann Date: Tue, 21 Nov 2023 15:44:22 +0100 Subject: [PATCH 1/6] added -n flag to tracegen --- fuzzware_pipeline/__init__.py | 3 +- fuzzware_pipeline/workers/tracegen.py | 56 +++++++++++++++++++-------- 2 files changed, 41 insertions(+), 18 deletions(-) diff --git a/fuzzware_pipeline/__init__.py b/fuzzware_pipeline/__init__.py index e03c4b7..701ee54 100644 --- a/fuzzware_pipeline/__init__.py +++ b/fuzzware_pipeline/__init__.py @@ -737,7 +737,7 @@ def do_gentraces(args, leftover_args): main_dir = project_main_dirs[main_dir_num-1] print(f"Generating traces for main directory {main_dir}") - gen_missing_maindir_traces(main_dir, required_trace_prefixes, tracedir_postfix=args.tracedir_postfix, log_progress=True, verbose=args.verbose, crashing_inputs=args.crashes) + gen_missing_maindir_traces(main_dir, required_trace_prefixes, tracedir_postfix=args.tracedir_postfix, log_progress=True, verbose=args.verbose, crashing_inputs=args.crashes, num_emulators=args.num_instances) MODE_GENSTATS = 'genstats' STATNAME_COV, STATNAME_MMIO_COSTS, STATNAME_MMIO_OVERHEAD_ELIM = 'coverage', 'modeling-costs', 'mmio-overhead-elim' @@ -1023,6 +1023,7 @@ def do_help(args, leftover_args): parser_gentraces.add_argument('--tracedir-postfix', help="(optional) generate traces in an alternative trace dir. If this is specified, an alternative trace dir is created within the fuzzer dir named traces_.", default=None) parser_gentraces.add_argument('--dryrun', action="store_true", default=False, help="Only list the missing trace files, do not generate actual traces.") parser_gentraces.add_argument('-v', '--verbose', action="store_true", default=False, help="Display stdout output of trace generation.") + parser_gentraces.add_argument('-n', '--num-instances', default=1, type=int, help="Number of local emulator instances to use.") # Genstats command-line arguments parser_genstats.add_argument('stats', nargs="*", default=(STATNAME_COV, STATNAME_CRASH_TIMINGS,STATNAME_MMIO_COSTS), help=f"The stats to generate. Options: {','.join(KNOWN_STATNAMES)}. Defaults to '{STATNAME_COV} {STATNAME_CRASH_TIMINGS} {STATNAME_MMIO_COSTS}'.") diff --git a/fuzzware_pipeline/workers/tracegen.py b/fuzzware_pipeline/workers/tracegen.py index 3de9e4d..32ef5a3 100644 --- a/fuzzware_pipeline/workers/tracegen.py +++ b/fuzzware_pipeline/workers/tracegen.py @@ -6,6 +6,7 @@ import time import uuid from pathlib import Path +from multiprocessing import Pool, Value import rq from fuzzware_pipeline.logging_handler import logging_handler @@ -67,7 +68,17 @@ def batch_gen_native_traces(config_path, input_paths, extra_args=None, bbl_set_p gentrace_proc.destroy() -def gen_missing_maindir_traces(maindir, required_trace_prefixes, fuzzer_nums=None, tracedir_postfix="", log_progress=False, verbose=False, crashing_inputs=False, force_overwrite=False): +# the number of completed tracegen jobs +# shared over processes, so it needs a lock +num_processed = None + +def init(args): + ''' store the counter for later use ''' + global counter + counter = args + + +def gen_missing_maindir_traces(maindir, required_trace_prefixes, fuzzer_nums=None, tracedir_postfix="", log_progress=False, verbose=False, crashing_inputs=False, force_overwrite=False, num_emulators=1): projdir = nc.project_base(maindir) config_path = nc.config_file_for_main_path(maindir) extra_args = parse_extra_args(load_extra_args(nc.extra_args_for_config_path(config_path)), projdir) @@ -138,30 +149,41 @@ def gen_missing_maindir_traces(maindir, required_trace_prefixes, fuzzer_nums=Non logger.info("No traces to generate for main path") return - num_processed = 0 - start_time = time.time() + # after here, starting time is never written if can_use_native_batch: input_paths, bbl_set_paths, mmio_set_paths, bbl_hash_paths = jobs_for_config batch_gen_native_traces(config_path, input_paths, extra_args, bbl_set_paths, mmio_set_paths, bbl_hash_paths, not verbose) if log_progress: logger.info(f"Generating traces took {time.time() - start_time:.02f} seconds for {len(input_paths)} input(s)") else: - num_processed = 0 + # jobs for config does not have all information we need for a run + # but we want everything in a list for mp.map + args = [] + global num_processed + num_processed = Value('i', 0) for input_path, bbl_trace_path, ram_trace_path, mmio_trace_path, bbl_set_path, mmio_set_path, bbl_hash_path in jobs_for_config: - gen_traces(str(config_path), str(input_path), - bbl_trace_path=bbl_trace_path, ram_trace_path=ram_trace_path, mmio_trace_path=mmio_trace_path, - bbl_set_path=bbl_set_path, mmio_set_path=mmio_set_path, bbl_hash_path=bbl_hash_path, - extra_args=extra_args, silent=not verbose - ) - num_processed += 1 - - if log_progress: - if num_processed > 0 and num_processed % 50 == 0: - time_passed = round(time.time() - start_time) - relative_done = (num_processed+1) / num_gentrace_jobs - time_estimated = round((relative_done ** (-1)) * time_passed) - logger.info(f"[*] Processed {num_processed}/{num_gentrace_jobs} in {time_passed} seconds. Estimated seconds remaining: {time_estimated-time_passed}") + args.append((str(config_path), str(input_path), bbl_trace_path, ram_trace_path, mmio_trace_path, bbl_set_path, mmio_set_path, bbl_hash_path, extra_args, verbose, start_time, log_progress, num_gentrace_jobs)) + with Pool(num_emulators) as p: + p.map(gen_traces_wrapper, args) + +def gen_traces_wrapper(job): + config_path, input_path, bbl_trace_path, ram_trace_path, mmio_trace_path, bbl_set_path, mmio_set_path, bbl_hash_path, extra_args, verbose, start_time, log_progress, num_gentrace_jobs = job + gen_traces(str(config_path), str(input_path), + bbl_trace_path=bbl_trace_path, ram_trace_path=ram_trace_path, mmio_trace_path=mmio_trace_path, + bbl_set_path=bbl_set_path, mmio_set_path=mmio_set_path, bbl_hash_path=bbl_hash_path, + extra_args=extra_args, silent=not verbose + ) + global num_processed + with num_processed.get_lock(): + num_processed.value += 1 + if log_progress: + if num_processed.value > 0 and num_processed.value % 50 == 0: + time_passed = round(time.time() - start_time) + relative_done = (num_processed.value+1) / num_gentrace_jobs + time_estimated = round((relative_done ** (-1)) * time_passed) + logger.info(f"[*] Processed {num_processed.value}/{num_gentrace_jobs} in {time_passed} seconds. Estimated seconds remaining: {time_estimated-time_passed}") + def gen_all_missing_traces(projdir, trace_name_prefixes=None, log_progress=False, verbose=False, crashing_inputs=False, force_overwrite=False): if trace_name_prefixes is None: From 583bfd54c1c387d19256c8f2acb26da9b8734c08 Mon Sep 17 00:00:00 2001 From: Simeon Hoffmann Date: Tue, 21 Nov 2023 16:30:03 +0100 Subject: [PATCH 2/6] made state sharing more explicit --- fuzzware_pipeline/__init__.py | 2 +- fuzzware_pipeline/workers/tracegen.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fuzzware_pipeline/__init__.py b/fuzzware_pipeline/__init__.py index 701ee54..c8522e6 100644 --- a/fuzzware_pipeline/__init__.py +++ b/fuzzware_pipeline/__init__.py @@ -728,7 +728,7 @@ def do_gentraces(args, leftover_args): if can_use_native_batch: print("[+] Using native batch mode as only natively supported traces are to be generated") else: - print(f"[*] We need non-native traces. Doing this one by one. This could take a while...") + print(f"[*] We need non-native traces. This could take a while...") for main_dir_num in main_dir_nums: if main_dir_num > len(project_main_dirs): diff --git a/fuzzware_pipeline/workers/tracegen.py b/fuzzware_pipeline/workers/tracegen.py index 32ef5a3..4c1f5c5 100644 --- a/fuzzware_pipeline/workers/tracegen.py +++ b/fuzzware_pipeline/workers/tracegen.py @@ -74,8 +74,8 @@ def batch_gen_native_traces(config_path, input_paths, extra_args=None, bbl_set_p def init(args): ''' store the counter for later use ''' - global counter - counter = args + global num_processed + num_processed = args def gen_missing_maindir_traces(maindir, required_trace_prefixes, fuzzer_nums=None, tracedir_postfix="", log_progress=False, verbose=False, crashing_inputs=False, force_overwrite=False, num_emulators=1): @@ -164,7 +164,7 @@ def gen_missing_maindir_traces(maindir, required_trace_prefixes, fuzzer_nums=Non num_processed = Value('i', 0) for input_path, bbl_trace_path, ram_trace_path, mmio_trace_path, bbl_set_path, mmio_set_path, bbl_hash_path in jobs_for_config: args.append((str(config_path), str(input_path), bbl_trace_path, ram_trace_path, mmio_trace_path, bbl_set_path, mmio_set_path, bbl_hash_path, extra_args, verbose, start_time, log_progress, num_gentrace_jobs)) - with Pool(num_emulators) as p: + with Pool(num_emulators, init, (num_processed,)) as p: p.map(gen_traces_wrapper, args) def gen_traces_wrapper(job): From 287c44b0651d7c03a75eb4af278fe5dcb1ad9abd Mon Sep 17 00:00:00 2001 From: smnhff <117087063+smnhff@users.noreply.github.com> Date: Mon, 25 Mar 2024 14:03:35 +0000 Subject: [PATCH 3/6] Checkpoints (#1) changed boot config entries to checkpoints --- fuzzware_pipeline/naming_conventions.py | 8 ++ fuzzware_pipeline/pipeline.py | 101 +++++++++++++++++++++++- fuzzware_pipeline/run_target.py | 7 +- fuzzware_pipeline/session.py | 66 +++++++++++----- 4 files changed, 157 insertions(+), 25 deletions(-) diff --git a/fuzzware_pipeline/naming_conventions.py b/fuzzware_pipeline/naming_conventions.py index 4145c9e..95f83fe 100644 --- a/fuzzware_pipeline/naming_conventions.py +++ b/fuzzware_pipeline/naming_conventions.py @@ -88,6 +88,14 @@ MEM_ACCESS_MODE_READ = "r" MEM_ACCESS_MODE_WRITE = "w" +# the checkpoint naming +CONFIG_ENTRY_CATEGORY_CHECKPOINTS = 'checkpoints' +CONFIG_ENTRY_NAME_CHECKPOINTS_REQUIRED = 'required' +CONFIG_ENTRY_NAME_CHECKPOINTS_BLACKLISTED = 'blacklisted' +CONFIG_ENTRY_NAME_CHECKPOINTS_AVOID = 'avoid' +CONFIG_ENTRY_NAME_CHECKPOINTS_TARGET = 'target' + +# the boot naming CONFIG_ENTRY_CATEGORY_BOOT = 'boot' CONFIG_ENTRY_NAME_BOOT_REQUIRED = 'required' CONFIG_ENTRY_NAME_BOOT_BLACKLISTED = 'blacklisted' diff --git a/fuzzware_pipeline/pipeline.py b/fuzzware_pipeline/pipeline.py index 78cf52f..d860f97 100644 --- a/fuzzware_pipeline/pipeline.py +++ b/fuzzware_pipeline/pipeline.py @@ -50,6 +50,10 @@ class Pipeline: boot_avoided_bbls: set groundtruth_valid_basic_blocks: set groundtruth_milestone_basic_blocks: set + # checkpoint logic + checkpoints: dict + current_checkpoint: dict + current_checkpoint_name: str # Runtime state start_time: int @@ -201,8 +205,12 @@ def check_emulator_dry(self): exit(1) logger.info("Emulator dry-run successful!") os.remove(dry_input) - + def parse_pipeline_yml_config(self, full_config): + self.parse_pipeline_boot_config(full_config) + self.parse_pipeline_checkpoint_config(full_config) + + def parse_pipeline_boot_config(self, full_config): self.boot_avoided_bbls = set() self.boot_required_bbls = set() boot_config = full_config.get(CONFIG_ENTRY_CATEGORY_BOOT) @@ -220,6 +228,49 @@ def parse_pipeline_yml_config(self, full_config): logger.debug("Avoid list: " + " ".join([hex(addr) for addr in self.boot_avoided_bbls])) logger.debug("Required: " + " ".join([hex(addr) for addr in self.boot_required_bbls])) + def parse_pipeline_checkpoint_config(self, full_config): + checkpoint_config = full_config.get(CONFIG_ENTRY_CATEGORY_CHECKPOINTS) + checkpoint_configs = {} + if checkpoint_config: + # this is a list of checkpoint objects + # this is the same order as in the config, as dicts preserve insertion order + for checkpoint_name in checkpoint_config.keys(): + single_checkpoint_parsed = self.parse_single_checkpoint(checkpoint_config[checkpoint_name]) + checkpoint_configs[checkpoint_name] = single_checkpoint_parsed + self.checkpoints = checkpoint_configs + first_checkpoint_key = list(self.checkpoints.keys())[0] + # the current checkpoint always holds the next checkpoint to reach + self.current_checkpoint = checkpoint_configs[first_checkpoint_key] + self.current_checkpoint_name = first_checkpoint_key + # if we have checkpoints, make the last checkpoint the booted_bbl + last_checkpoint_key = list(self.checkpoints.keys())[-1] + last_checkpoint = checkpoint_configs[last_checkpoint_key] + if last_checkpoint["required_bbls"]: + self.boot_required_bbls = last_checkpoint["required_bbls"] + if last_checkpoint["avoided_bbls"]: + self.boot_required_bbls = last_checkpoint["avoided_bbls"] + if self.booted_bbl == DEFAULT_IDLE_BBL: + self.booted_bbl = last_checkpoint["checkpoint_target"] + + def parse_single_checkpoint(self, checkpoint_config): + checkpoint = {} + # this parses a single checkpoint + required_bbls = checkpoint_config.get(CONFIG_ENTRY_NAME_CHECKPOINTS_REQUIRED) + if required_bbls: + checkpoint["required_bbls"] = set(map(lambda v: parse_address_value(self.symbols, v)&(~1), required_bbls)) + else: + # without else, the entry is not initialised if missing + checkpoint["required_bbls"] = set() + avoided_bbls = checkpoint_config.get(CONFIG_ENTRY_NAME_CHECKPOINTS_AVOID) or checkpoint_config.get(CONFIG_ENTRY_NAME_CHECKPOINTS_BLACKLISTED) + if avoided_bbls: + checkpoint["avoided_bbls"] = set(map(lambda v: parse_address_value(self.symbols, v)&(~1), avoided_bbls)) + else: + # without else, the entry is not initialised if missing + checkpoint["avoided_bbls"] = set() + # this one is mandatory, so no check before + checkpoint["checkpoint_target"] = parse_address_value(self.symbols, checkpoint_config[CONFIG_ENTRY_NAME_CHECKPOINTS_TARGET]) & (~1) + return checkpoint + def parse_ground_truth_files(self): valid_bb_list_path = self.valid_basic_block_list_path if os.path.exists(valid_bb_list_path): @@ -231,6 +282,9 @@ def parse_ground_truth_files(self): def __init__(self, parent_dir, name, base_inputs, num_main_fuzzer_procs, disable_modeling=False, write_worker_logs=False, do_full_tracing=False, config_name=SESS_FILENAME_CONFIG, timeout_seconds=0, use_aflpp=False): self.booted_bbl = DEFAULT_IDLE_BBL + self.checkpoints = {} + self.current_checkpoint = {} + self.current_checkpoint_name = "" self.disable_modeling = disable_modeling self.shutdown_requested = False self.sessions = {} @@ -430,6 +484,30 @@ def is_successfully_booted(self, bbl_set): (not self.boot_required_bbls - bbl_set) ) + # this checks if the currently defined checkpoint is hit + def checkpoint_progress(self, bbl_set): + tmp = self.current_checkpoint["checkpoint_target"] + # did we find our checkpoint? + return self.current_checkpoint and (self.current_checkpoint["checkpoint_target"] in bbl_set) and ( + # And no blacklist addresses found and all whitelist addresses in bbl set + (not self.current_checkpoint["avoided_bbls"] & bbl_set) and \ + (not self.current_checkpoint["required_bbls"] - bbl_set) + ) + + # set all the fields to the next checkpoint + def update_checkpoint(self): + if self.checkpoints: + checkpoint_names = list(self.checkpoints.keys()) + current_index = checkpoint_names.index(self.current_checkpoint_name) + # is this already the last checkpoint? + if (current_index + 1) == len(checkpoint_names): + # then return and do nothing + return + else: + # otherwise update checkpoint name and current checkpoint + self.current_checkpoint_name = checkpoint_names[current_index+1] + self.current_checkpoint = self.checkpoints[self.current_checkpoint_name] + def choose_next_session_inputs(self, config_map): """ Determines different sets of input file paths, ordered by desirability @@ -519,6 +597,7 @@ def add_main_session(self, prefix_input_candidate=None): # Before adding the new session, get the possibly previously used prefix path is_previously_used_prefix = False if self.curr_main_sess_index and self.curr_main_session.prefix_input_path: + logger.debug(f"We have a prefix from the previous session: {self.curr_main_session.prefix_input_path}") is_previously_used_prefix = True prefix_input_candidate = self.curr_main_session.prefix_input_path @@ -528,16 +607,20 @@ def add_main_session(self, prefix_input_candidate=None): # Try different sets of inputs in order of quality start_success = False - for input_path_list in self.choose_next_session_inputs(config_map): + input_paths = self.choose_next_session_inputs(config_map) + for input_path_list in input_paths: # We have previous inputs, carry them over logger.debug("Copying over {} inputs".format(len(input_path_list))) new_sess_inputs_dir = self.curr_main_session.base_input_dir + logger.debug(f"Creating directory {new_sess_inputs_dir}") os.mkdir(new_sess_inputs_dir) for path in input_path_list: + logger.debug(f"Copying {path} to {new_sess_inputs_dir}") shutil.copy2(path, new_sess_inputs_dir) - self.curr_main_session.minimize_inputs(prefix_candidate_path=prefix_input_candidate, is_previously_used_prefix=is_previously_used_prefix) + tmp = os.listdir(self.curr_main_session.base_input_dir) + logger.debug(f"Current base input dir content 4: {tmp}") # Try the inputs if self.curr_main_session.start_fuzzers(): start_success = True @@ -664,12 +747,21 @@ def handle_queue_forever(self): logger.info(f"Discovered milestone basic block: 0x{pc:08x}{sym_suffix}") self.visited_milestone_basic_blocks.add(pc) self.visited_translation_blocks |= new_bbs - + # if this is hit, we are done with our checkpoints! if (not (self.curr_main_session.prefix_input_path or pending_prefix_candidate)) and self.is_successfully_booted(bbl_set): logger.info("FOUND MAIN ADDRESS for trace file: '{}'".format(trace_filename)) pending_prefix_candidate = input_for_trace_path(trace_file_path) restart_pending = True self.curr_main_session.kill_fuzzers() + # if not, we need to check if we hit one of our checkpoints + elif (not (self.curr_main_session.prefix_input_path or pending_prefix_candidate)) and self.checkpoint_progress(bbl_set): + # we found our checkpoint and fulfilled avoid/visit conditions + logger.info("FOUND CHECKPOINT {} for trace file: '{}'".format(self.current_checkpoint_name, trace_filename)) + # set our current input as prefix and restart + pending_prefix_candidate = input_for_trace_path(trace_file_path) + restart_pending = True + # I think we cannot update our checkpoint here because it is still needed for prefix size computation + self.curr_main_session.kill_fuzzers() logger.debug("Looking at new MMIO access set") # For every new mmio access trace we get, trigger state generation for unique pc/mmio_addr pairs @@ -705,6 +797,7 @@ def handle_queue_forever(self): restart_pending, num_config_updates = False, 0 self.curr_main_session.shutdown() self.add_main_session(pending_prefix_candidate) + logger.debug(f"Updated to checkpoint {self.current_checkpoint_name}") pending_prefix_candidate = None time_latest_new_basic_block = None else: diff --git a/fuzzware_pipeline/run_target.py b/fuzzware_pipeline/run_target.py index 9f5f792..d8ca8b7 100644 --- a/fuzzware_pipeline/run_target.py +++ b/fuzzware_pipeline/run_target.py @@ -23,5 +23,10 @@ def run_target(config_path, input_path, extra_args, get_output=False, silent=Fal logger.debug("Full command: {}".format(" ".join(arg_list))) if get_output: - return subprocess.run(arg_list, check=False, stdout=subprocess.PIPE).stdout + out = subprocess.run(arg_list, check=False, stdout=subprocess.PIPE).stdout + err = subprocess.run(arg_list, check=False, stdout=subprocess.PIPE).stderr + logger.debug(f"subprocess stdout: {out}") + logger.debug(f"subprocess stderr: {err}") + return out + # return subprocess.run(arg_list, check=False, stdout=subprocess.PIPE).stdout return subprocess.call(arg_list, stdout=stdout, stderr=stderr) diff --git a/fuzzware_pipeline/session.py b/fuzzware_pipeline/session.py index 5b79dd9..ed94b0f 100644 --- a/fuzzware_pipeline/session.py +++ b/fuzzware_pipeline/session.py @@ -136,21 +136,30 @@ def start_fuzzer(self, fuzzer_num): self.fuzzers.append(fuzzer) return fuzzer.start(silent=True) - def get_booting_prefix_size(self, input_path): + def get_progress_prefix_size(self, input_path): """ For an input file located at input_path, find the prefix size required to reach successful boot. If booting successful, returns the size of the input prefix. Otherwise, returns None """ - gen_traces(self.config_path, input_path, mmio_trace_path=self.temp_mmio_trace_path, bbl_set_path=self.temp_bbl_set_path, extra_args=["--exit-at", "0x{:x}".format(self.parent.booted_bbl)]) + checkpoint_target = self.parent.current_checkpoint["checkpoint_target"] + gen_traces(self.config_path, input_path, mmio_trace_path=self.temp_mmio_trace_path, bbl_set_path=self.temp_bbl_set_path, extra_args=["--exit-at", "0x{:x}".format(checkpoint_target)]) bbl_set = set(parse_bbl_set(self.temp_bbl_set_path)) - if not self.parent.is_successfully_booted(bbl_set): + # did we find the last checkpoint? + checkpoints_done = self.parent.is_successfully_booted(bbl_set); + # did we find our current checkpoint? + checkpoint_progress = self.parent.checkpoint_progress(bbl_set); + # if neither happened, we do not have an interesting prefix + if not (checkpoints_done or checkpoint_progress): return None prefix_size = None - for _, _, _, mode, _, access_fuzz_ind, num_consumed_fuzz_bytes, _, _ in parse_mmio_trace(self.temp_mmio_trace_path)[::-1]: + # count all the consumptions + for evt_id, pc, lr, mode, access_size, access_fuzz_ind, num_consumed_fuzz_bytes, address, _ in parse_mmio_trace(self.temp_mmio_trace_path)[::-1]: if mode == "r": + logger.debug(f"found a memory access with the following properties: \n \ + pc: {pc}, lr: {lr}, access size: {access_size}, access indicator: {access_fuzz_ind}, num consumed bytes {num_consumed_fuzz_bytes}, address: {address}") prefix_size = access_fuzz_ind + num_consumed_fuzz_bytes break @@ -161,13 +170,14 @@ def get_booting_prefix_size(self, input_path): # Try expanding input and re-running for a number of times for _ in range(16): copy_prefix_to(self.temp_prefix_input_path, input_path, prefix_size) - gen_traces(self.config_path, self.temp_prefix_input_path, mmio_trace_path=self.temp_mmio_trace_path, bbl_set_path=self.temp_bbl_set_path, extra_args=["--exit-at", "0x{:x}".format(self.parent.booted_bbl)]) + # gen_traces(self.config_path, self.temp_prefix_input_path, mmio_trace_path=self.temp_mmio_trace_path, bbl_set_path=self.temp_bbl_set_path, extra_args=["--exit-at", "0x{:x}".format(self.parent.booted_bbl)]) + checkpoint_target = self.parent.current_checkpoint["checkpoint_target"] + gen_traces(self.config_path, self.temp_prefix_input_path, mmio_trace_path=self.temp_mmio_trace_path, bbl_set_path=self.temp_bbl_set_path, extra_args=["--exit-at", "0x{:x}".format(checkpoint_target)]) bbl_set = set(parse_bbl_set(self.temp_bbl_set_path)) - - if self.parent.is_successfully_booted(bbl_set): + # if we are done with our checkpoints or reached our current checkpoint, return the prefix size + if self.parent.is_successfully_booted(bbl_set) or self.parent.checkpoint_progress(bbl_set): return prefix_size prefix_size += 1 - return None def emulator_args(self): @@ -197,25 +207,39 @@ def minimize_inputs(self, silent=False, prefix_candidate_path=None, is_previousl # Handle cases where prefix candidate is passed if prefix_candidate_path: - booting_prefix_size = self.get_booting_prefix_size(prefix_candidate_path) - is_booted_successfully = booting_prefix_size is not None + # this returns none when we do not have a prefix + # if it is not none, the current checkpoint is no longer needed + progress_prefix_size = self.get_progress_prefix_size(prefix_candidate_path) + did_some_progress = progress_prefix_size is not None if is_previously_used_prefix: - if is_booted_successfully: + if did_some_progress: + # the old prefix has had its update processed already + # technically, this cannot happen. An old prefix cannot progress + # further in the checkpoints, can it? + # self.parent.update_checkpoint() # A previously booting prefix still boots. # Set the booting prefix and prepend remainder to input files - self.save_prefix_input(prefix_candidate_path, booting_prefix_size) - prepend_to_all(self.base_input_dir, prefix_candidate_path, from_offset=booting_prefix_size) + self.save_prefix_input(prefix_candidate_path, progress_prefix_size) + prepend_to_all(self.base_input_dir, prefix_candidate_path, from_offset=progress_prefix_size) else: - # The input no longer successfully boots the image + # the prefix did not make progress. Still, keep it as we need it to reach the next prefix # Attach the no longer booting prefix to input files and minimize without prefix + self.save_prefix_input(prefix_candidate_path, progress_prefix_size) prepend_to_all(self.base_input_dir, prefix_candidate_path) else: - if is_booted_successfully: + if did_some_progress: + # Update the checkpoint since we are not none + # does not matter if we are the last checkpoint + # in this case, update checkpoint does nothing + self.parent.update_checkpoint() # A brand new booting input was discovered, use it as new input prefix and reset to generic inputs # extract prefix from input, copy over generic base inputs + logger.debug(f"Reached a checkpoint, deleting {self.base_input_dir} and resetting to {self.parent.generic_inputs_dir}") + tmp = os.listdir(self.base_input_dir) + logger.debug(f"Current base input dir content 1: {tmp}") shutil.rmtree(self.base_input_dir) shutil.copytree(self.parent.generic_inputs_dir, self.base_input_dir) - self.save_prefix_input(prefix_candidate_path, booting_prefix_size) + self.save_prefix_input(prefix_candidate_path, progress_prefix_size) # No minimization or input corpus adjustment required in this case, return return else: @@ -223,6 +247,7 @@ def minimize_inputs(self, silent=False, prefix_candidate_path=None, is_previousl pass # Perform minimization. In case an input prefix is used, this is already saved in self.extra_runtime_args + logger.debug(f"Moving {self.base_input_dir} to {self.temp_minimization_dir}") shutil.move(self.base_input_dir, self.temp_minimization_dir) harness_args = self.emulator_args() @@ -230,11 +255,12 @@ def minimize_inputs(self, silent=False, prefix_candidate_path=None, is_previousl run_corpus_minimizer(harness_args, self.temp_minimization_dir, self.base_input_dir, silent=silent, use_aflpp=self.parent.use_aflpp) if not os.listdir(self.base_input_dir): self.parent.add_warning_line("Minimization for fuzzing session '{}' had no inputs remaining, copying generic inputs.".format(self.name)) + logger.debug(f"Minimisation did not find a base dir and is copying over the generic dir") shutil.rmtree(self.base_input_dir, True) shutil.copytree(self.parent.generic_inputs_dir, self.base_input_dir) - except subprocess.CalledProcessError: + except subprocess.CalledProcessError as e: self.parent.add_warning_line("Minimization for fuzzing session '{}' failed, copying full inputs.".format(self.name)) - + # In case minimization does not work out, copy all inputs shutil.rmtree(self.base_input_dir, True) shutil.copytree(self.temp_minimization_dir, self.base_input_dir) @@ -291,11 +317,11 @@ def start_fuzzers(self): logger.warning("[TRIAGING STEP 1] ... Output end") logger.warning("\n\n[TRIAGING STEP 2] Re-running single emulation run, showing its output...") - run_target(self.config_path, first_file(self.base_input_dir), self.extra_runtime_args + [ "-v" ]) + run_target(self.config_path, first_file(self.base_input_dir), self.extra_runtime_args + [ "-v" ], get_output=True) logger.warning("[TRIAGING STEP 2] ... Output end\n") logger.warning("\n\n[TRIAGING STEP 3] Re-running single emulation run with .cur_input file, showing its output...") - run_target(self.config_path, self.fuzzer_cur_input_path(instance.inst_num), self.extra_runtime_args + [ "-v" ]) + run_target(self.config_path, self.fuzzer_cur_input_path(instance.inst_num), self.extra_runtime_args + [ "-v" ], get_output=True) logger.warning("[TRIAGING STEP 3] ... Output end\n") return False From a4f871919c17ee45e3676807aa893d06961b6fdb Mon Sep 17 00:00:00 2001 From: Simeon Hoffmann Date: Mon, 25 Mar 2024 15:15:41 +0100 Subject: [PATCH 4/6] removed missed debug statements --- fuzzware_pipeline/pipeline.py | 6 ------ fuzzware_pipeline/run_target.py | 8 +------- fuzzware_pipeline/session.py | 7 ------- 3 files changed, 1 insertion(+), 20 deletions(-) diff --git a/fuzzware_pipeline/pipeline.py b/fuzzware_pipeline/pipeline.py index d860f97..f627527 100644 --- a/fuzzware_pipeline/pipeline.py +++ b/fuzzware_pipeline/pipeline.py @@ -597,7 +597,6 @@ def add_main_session(self, prefix_input_candidate=None): # Before adding the new session, get the possibly previously used prefix path is_previously_used_prefix = False if self.curr_main_sess_index and self.curr_main_session.prefix_input_path: - logger.debug(f"We have a prefix from the previous session: {self.curr_main_session.prefix_input_path}") is_previously_used_prefix = True prefix_input_candidate = self.curr_main_session.prefix_input_path @@ -613,14 +612,10 @@ def add_main_session(self, prefix_input_candidate=None): logger.debug("Copying over {} inputs".format(len(input_path_list))) new_sess_inputs_dir = self.curr_main_session.base_input_dir - logger.debug(f"Creating directory {new_sess_inputs_dir}") os.mkdir(new_sess_inputs_dir) for path in input_path_list: - logger.debug(f"Copying {path} to {new_sess_inputs_dir}") shutil.copy2(path, new_sess_inputs_dir) self.curr_main_session.minimize_inputs(prefix_candidate_path=prefix_input_candidate, is_previously_used_prefix=is_previously_used_prefix) - tmp = os.listdir(self.curr_main_session.base_input_dir) - logger.debug(f"Current base input dir content 4: {tmp}") # Try the inputs if self.curr_main_session.start_fuzzers(): start_success = True @@ -797,7 +792,6 @@ def handle_queue_forever(self): restart_pending, num_config_updates = False, 0 self.curr_main_session.shutdown() self.add_main_session(pending_prefix_candidate) - logger.debug(f"Updated to checkpoint {self.current_checkpoint_name}") pending_prefix_candidate = None time_latest_new_basic_block = None else: diff --git a/fuzzware_pipeline/run_target.py b/fuzzware_pipeline/run_target.py index d8ca8b7..538e8de 100644 --- a/fuzzware_pipeline/run_target.py +++ b/fuzzware_pipeline/run_target.py @@ -23,10 +23,4 @@ def run_target(config_path, input_path, extra_args, get_output=False, silent=Fal logger.debug("Full command: {}".format(" ".join(arg_list))) if get_output: - out = subprocess.run(arg_list, check=False, stdout=subprocess.PIPE).stdout - err = subprocess.run(arg_list, check=False, stdout=subprocess.PIPE).stderr - logger.debug(f"subprocess stdout: {out}") - logger.debug(f"subprocess stderr: {err}") - return out - # return subprocess.run(arg_list, check=False, stdout=subprocess.PIPE).stdout - return subprocess.call(arg_list, stdout=stdout, stderr=stderr) + return subprocess.run(arg_list, check=False, stdout=subprocess.PIPE).stdout diff --git a/fuzzware_pipeline/session.py b/fuzzware_pipeline/session.py index ed94b0f..975549a 100644 --- a/fuzzware_pipeline/session.py +++ b/fuzzware_pipeline/session.py @@ -158,8 +158,6 @@ def get_progress_prefix_size(self, input_path): # count all the consumptions for evt_id, pc, lr, mode, access_size, access_fuzz_ind, num_consumed_fuzz_bytes, address, _ in parse_mmio_trace(self.temp_mmio_trace_path)[::-1]: if mode == "r": - logger.debug(f"found a memory access with the following properties: \n \ - pc: {pc}, lr: {lr}, access size: {access_size}, access indicator: {access_fuzz_ind}, num consumed bytes {num_consumed_fuzz_bytes}, address: {address}") prefix_size = access_fuzz_ind + num_consumed_fuzz_bytes break @@ -234,9 +232,6 @@ def minimize_inputs(self, silent=False, prefix_candidate_path=None, is_previousl self.parent.update_checkpoint() # A brand new booting input was discovered, use it as new input prefix and reset to generic inputs # extract prefix from input, copy over generic base inputs - logger.debug(f"Reached a checkpoint, deleting {self.base_input_dir} and resetting to {self.parent.generic_inputs_dir}") - tmp = os.listdir(self.base_input_dir) - logger.debug(f"Current base input dir content 1: {tmp}") shutil.rmtree(self.base_input_dir) shutil.copytree(self.parent.generic_inputs_dir, self.base_input_dir) self.save_prefix_input(prefix_candidate_path, progress_prefix_size) @@ -247,7 +242,6 @@ def minimize_inputs(self, silent=False, prefix_candidate_path=None, is_previousl pass # Perform minimization. In case an input prefix is used, this is already saved in self.extra_runtime_args - logger.debug(f"Moving {self.base_input_dir} to {self.temp_minimization_dir}") shutil.move(self.base_input_dir, self.temp_minimization_dir) harness_args = self.emulator_args() @@ -255,7 +249,6 @@ def minimize_inputs(self, silent=False, prefix_candidate_path=None, is_previousl run_corpus_minimizer(harness_args, self.temp_minimization_dir, self.base_input_dir, silent=silent, use_aflpp=self.parent.use_aflpp) if not os.listdir(self.base_input_dir): self.parent.add_warning_line("Minimization for fuzzing session '{}' had no inputs remaining, copying generic inputs.".format(self.name)) - logger.debug(f"Minimisation did not find a base dir and is copying over the generic dir") shutil.rmtree(self.base_input_dir, True) shutil.copytree(self.parent.generic_inputs_dir, self.base_input_dir) except subprocess.CalledProcessError as e: From e0fbf52cbb3b25462454eece3a96beed22c2c807 Mon Sep 17 00:00:00 2001 From: Simeon Hoffmann Date: Mon, 25 Mar 2024 15:56:56 +0100 Subject: [PATCH 5/6] added wrongly removed statement back --- fuzzware_pipeline/run_target.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fuzzware_pipeline/run_target.py b/fuzzware_pipeline/run_target.py index 538e8de..2825a7b 100644 --- a/fuzzware_pipeline/run_target.py +++ b/fuzzware_pipeline/run_target.py @@ -24,3 +24,4 @@ def run_target(config_path, input_path, extra_args, get_output=False, silent=Fal if get_output: return subprocess.run(arg_list, check=False, stdout=subprocess.PIPE).stdout + return subprocess.call(arg_list, stdout=stdout, stderr=stderr) From 0d38717f56c0f8d85864704b6d473b8b5c632d65 Mon Sep 17 00:00:00 2001 From: smnhff <117087063+smnhff@users.noreply.github.com> Date: Mon, 25 Mar 2024 15:10:26 +0000 Subject: [PATCH 6/6] small bugfix (#2) --- fuzzware_pipeline/pipeline.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fuzzware_pipeline/pipeline.py b/fuzzware_pipeline/pipeline.py index f627527..8e09139 100644 --- a/fuzzware_pipeline/pipeline.py +++ b/fuzzware_pipeline/pipeline.py @@ -486,7 +486,6 @@ def is_successfully_booted(self, bbl_set): # this checks if the currently defined checkpoint is hit def checkpoint_progress(self, bbl_set): - tmp = self.current_checkpoint["checkpoint_target"] # did we find our checkpoint? return self.current_checkpoint and (self.current_checkpoint["checkpoint_target"] in bbl_set) and ( # And no blacklist addresses found and all whitelist addresses in bbl set