fuzzware-fuzzer · smnhff-work · Nov 21, 2023 · Nov 21, 2023 · Mar 25, 2024 · Mar 25, 2024
diff --git a/fuzzware_pipeline/naming_conventions.py b/fuzzware_pipeline/naming_conventions.py
@@ -88,6 +88,14 @@
 MEM_ACCESS_MODE_READ = "r"
 MEM_ACCESS_MODE_WRITE = "w"
 
+# the checkpoint naming
+CONFIG_ENTRY_CATEGORY_CHECKPOINTS = 'checkpoints'
+CONFIG_ENTRY_NAME_CHECKPOINTS_REQUIRED = 'required'
+CONFIG_ENTRY_NAME_CHECKPOINTS_BLACKLISTED = 'blacklisted'
+CONFIG_ENTRY_NAME_CHECKPOINTS_AVOID = 'avoid'
+CONFIG_ENTRY_NAME_CHECKPOINTS_TARGET = 'target'
+
+# the boot naming
 CONFIG_ENTRY_CATEGORY_BOOT = 'boot'
 CONFIG_ENTRY_NAME_BOOT_REQUIRED = 'required'
 CONFIG_ENTRY_NAME_BOOT_BLACKLISTED = 'blacklisted'

diff --git a/fuzzware_pipeline/pipeline.py b/fuzzware_pipeline/pipeline.py
@@ -50,6 +50,10 @@ class Pipeline:
     boot_avoided_bbls: set
     groundtruth_valid_basic_blocks: set
     groundtruth_milestone_basic_blocks: set
+    # checkpoint logic
+    checkpoints: dict
+    current_checkpoint: dict
+    current_checkpoint_name: str
 
     # Runtime state
     start_time: int
@@ -201,8 +205,12 @@ def check_emulator_dry(self):
             exit(1)
         logger.info("Emulator dry-run successful!")
         os.remove(dry_input)
-
+    
     def parse_pipeline_yml_config(self, full_config):
+        self.parse_pipeline_boot_config(full_config)
+        self.parse_pipeline_checkpoint_config(full_config)
+
+    def parse_pipeline_boot_config(self, full_config):
         self.boot_avoided_bbls = set()
         self.boot_required_bbls = set()
         boot_config = full_config.get(CONFIG_ENTRY_CATEGORY_BOOT)
@@ -220,6 +228,49 @@ def parse_pipeline_yml_config(self, full_config):
             logger.debug("Avoid list: " + " ".join([hex(addr) for addr in self.boot_avoided_bbls]))
             logger.debug("Required: " + " ".join([hex(addr) for addr in self.boot_required_bbls]))
 
+    def parse_pipeline_checkpoint_config(self, full_config):
+        checkpoint_config = full_config.get(CONFIG_ENTRY_CATEGORY_CHECKPOINTS)
+        checkpoint_configs = {}
+        if checkpoint_config:
+            # this is a list of checkpoint objects
+            # this is the same order as in the config, as dicts preserve insertion order
+            for checkpoint_name in checkpoint_config.keys():
+                single_checkpoint_parsed = self.parse_single_checkpoint(checkpoint_config[checkpoint_name])
+                checkpoint_configs[checkpoint_name] = single_checkpoint_parsed
+            self.checkpoints = checkpoint_configs
+            first_checkpoint_key = list(self.checkpoints.keys())[0]
+            # the current checkpoint always holds the next checkpoint to reach
+            self.current_checkpoint = checkpoint_configs[first_checkpoint_key]
+            self.current_checkpoint_name = first_checkpoint_key
+            # if we have checkpoints, make the last checkpoint the booted_bbl
+            last_checkpoint_key = list(self.checkpoints.keys())[-1]
+            last_checkpoint = checkpoint_configs[last_checkpoint_key]
+            if last_checkpoint["required_bbls"]:
+                self.boot_required_bbls = last_checkpoint["required_bbls"]
+            if last_checkpoint["avoided_bbls"]:
+                self.boot_required_bbls = last_checkpoint["avoided_bbls"]
+            if self.booted_bbl == DEFAULT_IDLE_BBL:
+                self.booted_bbl = last_checkpoint["checkpoint_target"]
+
+    def parse_single_checkpoint(self, checkpoint_config):
+        checkpoint = {}
+        # this parses a single checkpoint
+        required_bbls = checkpoint_config.get(CONFIG_ENTRY_NAME_CHECKPOINTS_REQUIRED)
+        if required_bbls:
+            checkpoint["required_bbls"] = set(map(lambda v: parse_address_value(self.symbols, v)&(~1), required_bbls))
+        else:
+            # without else, the entry is not initialised if missing
+            checkpoint["required_bbls"] = set()
+        avoided_bbls = checkpoint_config.get(CONFIG_ENTRY_NAME_CHECKPOINTS_AVOID) or checkpoint_config.get(CONFIG_ENTRY_NAME_CHECKPOINTS_BLACKLISTED)
+        if avoided_bbls:
+            checkpoint["avoided_bbls"] = set(map(lambda v: parse_address_value(self.symbols, v)&(~1), avoided_bbls))
+        else:
+            # without else, the entry is not initialised if missing
+            checkpoint["avoided_bbls"] = set()
+        # this one is mandatory, so no check before
+        checkpoint["checkpoint_target"] = parse_address_value(self.symbols, checkpoint_config[CONFIG_ENTRY_NAME_CHECKPOINTS_TARGET]) & (~1)
+        return checkpoint
+
     def parse_ground_truth_files(self):
         valid_bb_list_path = self.valid_basic_block_list_path
         if os.path.exists(valid_bb_list_path):
@@ -231,6 +282,9 @@ def parse_ground_truth_files(self):
 
     def __init__(self, parent_dir, name, base_inputs, num_main_fuzzer_procs, disable_modeling=False, write_worker_logs=False, do_full_tracing=False, config_name=SESS_FILENAME_CONFIG, timeout_seconds=0, use_aflpp=False):
         self.booted_bbl = DEFAULT_IDLE_BBL
+        self.checkpoints = {}
+        self.current_checkpoint = {}
+        self.current_checkpoint_name = ""
         self.disable_modeling = disable_modeling
         self.shutdown_requested = False
         self.sessions = {}
@@ -430,6 +484,29 @@ def is_successfully_booted(self, bbl_set):
                 (not self.boot_required_bbls - bbl_set)
         )
 
+    # this checks if the currently defined checkpoint is hit 
+    def checkpoint_progress(self, bbl_set):
+        # did we find our checkpoint?
+        return self.current_checkpoint and (self.current_checkpoint["checkpoint_target"] in bbl_set) and (
+            # And no blacklist addresses found and all whitelist addresses in bbl set
+            (not self.current_checkpoint["avoided_bbls"] & bbl_set) and \
+                (not self.current_checkpoint["required_bbls"] - bbl_set)
+        )
+
+    # set all the fields to the next checkpoint
+    def update_checkpoint(self):
+        if self.checkpoints:
+            checkpoint_names = list(self.checkpoints.keys())
+            current_index = checkpoint_names.index(self.current_checkpoint_name)
+            # is this already the last checkpoint?
+            if (current_index + 1) == len(checkpoint_names):
+                # then return and do nothing
+                return
+            else:
+                # otherwise update checkpoint name and current checkpoint
+                self.current_checkpoint_name = checkpoint_names[current_index+1]
+                self.current_checkpoint = self.checkpoints[self.current_checkpoint_name]
+
     def choose_next_session_inputs(self, config_map):
         """
         Determines different sets of input file paths, ordered by desirability
@@ -528,15 +605,15 @@ def add_main_session(self, prefix_input_candidate=None):
 
         # Try different sets of inputs in order of quality
         start_success = False
-        for input_path_list in self.choose_next_session_inputs(config_map):
+        input_paths = self.choose_next_session_inputs(config_map)
+        for input_path_list in input_paths:
             # We have previous inputs, carry them over
             logger.debug("Copying over {} inputs".format(len(input_path_list)))
 
             new_sess_inputs_dir = self.curr_main_session.base_input_dir
             os.mkdir(new_sess_inputs_dir)
             for path in input_path_list:
                 shutil.copy2(path, new_sess_inputs_dir)
-
             self.curr_main_session.minimize_inputs(prefix_candidate_path=prefix_input_candidate, is_previously_used_prefix=is_previously_used_prefix)
             # Try the inputs
             if self.curr_main_session.start_fuzzers():
@@ -664,12 +741,21 @@ def handle_queue_forever(self):
                                     logger.info(f"Discovered milestone basic block: 0x{pc:08x}{sym_suffix}")
                                     self.visited_milestone_basic_blocks.add(pc)
                             self.visited_translation_blocks |= new_bbs
-
+                        # if this is hit, we are done with our checkpoints!
                         if (not (self.curr_main_session.prefix_input_path or pending_prefix_candidate)) and self.is_successfully_booted(bbl_set):
                             logger.info("FOUND MAIN ADDRESS for trace file: '{}'".format(trace_filename))
                             pending_prefix_candidate = input_for_trace_path(trace_file_path)
                             restart_pending = True
                             self.curr_main_session.kill_fuzzers()
+                        # if not, we need to check if we hit one of our checkpoints
+                        elif (not (self.curr_main_session.prefix_input_path or pending_prefix_candidate)) and self.checkpoint_progress(bbl_set):
+                            # we found our checkpoint and fulfilled avoid/visit conditions
+                            logger.info("FOUND CHECKPOINT {} for trace file: '{}'".format(self.current_checkpoint_name, trace_filename))
+                            # set our current input as prefix and restart
+                            pending_prefix_candidate = input_for_trace_path(trace_file_path)
+                            restart_pending = True
+                            # I think we cannot update our checkpoint here because it is still needed for prefix size computation 
+                            self.curr_main_session.kill_fuzzers()
 
                         logger.debug("Looking at new MMIO access set")
                         # For every new mmio access trace we get, trigger state generation for unique pc/mmio_addr pairs

diff --git a/fuzzware_pipeline/run_target.py b/fuzzware_pipeline/run_target.py
@@ -23,5 +23,5 @@ def run_target(config_path, input_path, extra_args, get_output=False, silent=Fal
         logger.debug("Full command: {}".format(" ".join(arg_list)))
 
     if get_output:
-        return subprocess.run(arg_list, check=False, stdout=subprocess.PIPE).stdout
+         return subprocess.run(arg_list, check=False, stdout=subprocess.PIPE).stdout
     return subprocess.call(arg_list, stdout=stdout, stderr=stderr)
diff --git a/fuzzware_pipeline/session.py b/fuzzware_pipeline/session.py
@@ -136,20 +136,27 @@ def start_fuzzer(self, fuzzer_num):
         self.fuzzers.append(fuzzer)
         return fuzzer.start(silent=True)
 
-    def get_booting_prefix_size(self, input_path):
+    def get_progress_prefix_size(self, input_path):
         """
         For an input file located at input_path, find the prefix size required to reach successful boot.
 
         If booting successful, returns the size of the input prefix.
         Otherwise, returns None
         """
-        gen_traces(self.config_path, input_path, mmio_trace_path=self.temp_mmio_trace_path, bbl_set_path=self.temp_bbl_set_path, extra_args=["--exit-at", "0x{:x}".format(self.parent.booted_bbl)])
+        checkpoint_target = self.parent.current_checkpoint["checkpoint_target"]
+        gen_traces(self.config_path, input_path, mmio_trace_path=self.temp_mmio_trace_path, bbl_set_path=self.temp_bbl_set_path, extra_args=["--exit-at", "0x{:x}".format(checkpoint_target)])
         bbl_set = set(parse_bbl_set(self.temp_bbl_set_path))
-        if not self.parent.is_successfully_booted(bbl_set):
+        # did we find the last checkpoint?
+        checkpoints_done = self.parent.is_successfully_booted(bbl_set);
+        # did we find our current checkpoint?
+        checkpoint_progress = self.parent.checkpoint_progress(bbl_set);
+        # if neither happened, we do not have an interesting prefix
+        if not (checkpoints_done or checkpoint_progress):
             return None
 
         prefix_size = None
-        for _, _, _, mode, _, access_fuzz_ind, num_consumed_fuzz_bytes, _, _ in parse_mmio_trace(self.temp_mmio_trace_path)[::-1]:
+        # count all the consumptions
+        for evt_id, pc, lr, mode, access_size, access_fuzz_ind, num_consumed_fuzz_bytes, address, _ in parse_mmio_trace(self.temp_mmio_trace_path)[::-1]:
             if mode == "r":
                 prefix_size = access_fuzz_ind + num_consumed_fuzz_bytes
                 break
@@ -161,13 +168,14 @@ def get_booting_prefix_size(self, input_path):
             # Try expanding input and re-running for a number of times
             for _ in range(16):
                 copy_prefix_to(self.temp_prefix_input_path, input_path, prefix_size)
-                gen_traces(self.config_path, self.temp_prefix_input_path, mmio_trace_path=self.temp_mmio_trace_path, bbl_set_path=self.temp_bbl_set_path, extra_args=["--exit-at", "0x{:x}".format(self.parent.booted_bbl)])
+                # gen_traces(self.config_path, self.temp_prefix_input_path, mmio_trace_path=self.temp_mmio_trace_path, bbl_set_path=self.temp_bbl_set_path, extra_args=["--exit-at", "0x{:x}".format(self.parent.booted_bbl)])
+                checkpoint_target = self.parent.current_checkpoint["checkpoint_target"]
+                gen_traces(self.config_path, self.temp_prefix_input_path, mmio_trace_path=self.temp_mmio_trace_path, bbl_set_path=self.temp_bbl_set_path, extra_args=["--exit-at", "0x{:x}".format(checkpoint_target)])
                 bbl_set = set(parse_bbl_set(self.temp_bbl_set_path))
-
-                if self.parent.is_successfully_booted(bbl_set):
+                # if we are done with our checkpoints or reached our current checkpoint, return the prefix size
+                if self.parent.is_successfully_booted(bbl_set) or self.parent.checkpoint_progress(bbl_set):
                     return prefix_size
                 prefix_size += 1
-
         return None
 
     def emulator_args(self):
@@ -197,25 +205,36 @@ def minimize_inputs(self, silent=False, prefix_candidate_path=None, is_previousl
 
         # Handle cases where prefix candidate is passed
         if prefix_candidate_path:
-            booting_prefix_size = self.get_booting_prefix_size(prefix_candidate_path)
-            is_booted_successfully = booting_prefix_size is not None
+            # this returns none when we do not have a prefix
+            # if it is not none, the current checkpoint is no longer needed 
+            progress_prefix_size = self.get_progress_prefix_size(prefix_candidate_path)
+            did_some_progress = progress_prefix_size is not None
             if is_previously_used_prefix:
-                if is_booted_successfully:
+                if did_some_progress:
+                    # the old prefix has had its update processed already
+                    # technically, this cannot happen. An old prefix cannot progress 
+                    # further in the checkpoints, can it?
+                    # self.parent.update_checkpoint()
                     # A previously booting prefix still boots.
                     # Set the booting prefix and prepend remainder to input files
-                    self.save_prefix_input(prefix_candidate_path, booting_prefix_size)
-                    prepend_to_all(self.base_input_dir, prefix_candidate_path, from_offset=booting_prefix_size)
+                    self.save_prefix_input(prefix_candidate_path, progress_prefix_size)
+                    prepend_to_all(self.base_input_dir, prefix_candidate_path, from_offset=progress_prefix_size)
                 else:
-                    # The input no longer successfully boots the image
+                    # the prefix did not make progress. Still, keep it as we need it to reach the next prefix
                     # Attach the no longer booting prefix to input files and minimize without prefix
+                    self.save_prefix_input(prefix_candidate_path, progress_prefix_size)
                     prepend_to_all(self.base_input_dir, prefix_candidate_path)
             else:
-                if is_booted_successfully:
+                if did_some_progress:
+                    # Update the checkpoint since we are not none
+                    # does not matter if we are the last checkpoint
+                    # in this case, update checkpoint does nothing
+                    self.parent.update_checkpoint()
                     # A brand new booting input was discovered, use it as new input prefix and reset to generic inputs
                     # extract prefix from input, copy over generic base inputs
                     shutil.rmtree(self.base_input_dir)
                     shutil.copytree(self.parent.generic_inputs_dir, self.base_input_dir)
-                    self.save_prefix_input(prefix_candidate_path, booting_prefix_size)
+                    self.save_prefix_input(prefix_candidate_path, progress_prefix_size)
                     # No minimization or input corpus adjustment required in this case, return
                     return
         else:
@@ -232,9 +251,9 @@ def minimize_inputs(self, silent=False, prefix_candidate_path=None, is_previousl
                 self.parent.add_warning_line("Minimization for fuzzing session '{}' had no inputs remaining, copying generic inputs.".format(self.name))
                 shutil.rmtree(self.base_input_dir, True)
                 shutil.copytree(self.parent.generic_inputs_dir, self.base_input_dir)
-        except subprocess.CalledProcessError:
+        except subprocess.CalledProcessError as e:
             self.parent.add_warning_line("Minimization for fuzzing session '{}' failed, copying full inputs.".format(self.name))
-
+            
             # In case minimization does not work out, copy all inputs
             shutil.rmtree(self.base_input_dir, True)
             shutil.copytree(self.temp_minimization_dir, self.base_input_dir)
@@ -291,11 +310,11 @@ def start_fuzzers(self):
                 logger.warning("[TRIAGING STEP 1] ... Output end")
 
                 logger.warning("\n\n[TRIAGING STEP 2] Re-running single emulation run, showing its output...")
-                run_target(self.config_path, first_file(self.base_input_dir), self.extra_runtime_args + [ "-v" ])
+                run_target(self.config_path, first_file(self.base_input_dir), self.extra_runtime_args + [ "-v" ], get_output=True)
                 logger.warning("[TRIAGING STEP 2] ... Output end\n")
 
                 logger.warning("\n\n[TRIAGING STEP 3] Re-running single emulation run with .cur_input file, showing its output...")
-                run_target(self.config_path, self.fuzzer_cur_input_path(instance.inst_num), self.extra_runtime_args + [ "-v" ])
+                run_target(self.config_path, self.fuzzer_cur_input_path(instance.inst_num), self.extra_runtime_args + [ "-v" ], get_output=True)
                 logger.warning("[TRIAGING STEP 3] ... Output end\n")
 
                 return False