From 949eac91aa52bd125592b42b616fa713bf32bcec Mon Sep 17 00:00:00 2001
From: Simeon Hoffmann <simeon.hoffmann@cispa.de>
Date: Tue, 21 Nov 2023 15:44:22 +0100
Subject: [PATCH 1/6] added -n flag to tracegen

---
 fuzzware_pipeline/__init__.py         |  3 +-
 fuzzware_pipeline/workers/tracegen.py | 56 +++++++++++++++++++--------
 2 files changed, 41 insertions(+), 18 deletions(-)
diff --git a/fuzzware_pipeline/__init__.py b/fuzzware_pipeline/__init__.py
index e03c4b7..701ee54 100644
--- a/fuzzware_pipeline/__init__.py
+++ b/fuzzware_pipeline/__init__.py
@@ -737,7 +737,7 @@ def do_gentraces(args, leftover_args):
         main_dir = project_main_dirs[main_dir_num-1]
 
         print(f"Generating traces for main directory {main_dir}")
-        gen_missing_maindir_traces(main_dir, required_trace_prefixes, tracedir_postfix=args.tracedir_postfix, log_progress=True, verbose=args.verbose, crashing_inputs=args.crashes)
+        gen_missing_maindir_traces(main_dir, required_trace_prefixes, tracedir_postfix=args.tracedir_postfix, log_progress=True, verbose=args.verbose, crashing_inputs=args.crashes, num_emulators=args.num_instances)
 
 MODE_GENSTATS = 'genstats'
 STATNAME_COV, STATNAME_MMIO_COSTS, STATNAME_MMIO_OVERHEAD_ELIM = 'coverage', 'modeling-costs', 'mmio-overhead-elim'
@@ -1023,6 +1023,7 @@ def do_help(args, leftover_args):
     parser_gentraces.add_argument('--tracedir-postfix', help="(optional) generate traces in an alternative trace dir. If this is specified, an alternative trace dir is created within the fuzzer dir named traces_<tracedir-postfix>.", default=None)
     parser_gentraces.add_argument('--dryrun', action="store_true", default=False, help="Only list the missing trace files, do not generate actual traces.")
     parser_gentraces.add_argument('-v', '--verbose', action="store_true", default=False, help="Display stdout output of trace generation.")
+    parser_gentraces.add_argument('-n', '--num-instances', default=1, type=int, help="Number of local emulator instances to use.")
 
     # Genstats command-line arguments
     parser_genstats.add_argument('stats', nargs="*", default=(STATNAME_COV, STATNAME_CRASH_TIMINGS,STATNAME_MMIO_COSTS), help=f"The stats to generate. Options: {','.join(KNOWN_STATNAMES)}. Defaults to '{STATNAME_COV} {STATNAME_CRASH_TIMINGS} {STATNAME_MMIO_COSTS}'.")
diff --git a/fuzzware_pipeline/workers/tracegen.py b/fuzzware_pipeline/workers/tracegen.py
index 3de9e4d..32ef5a3 100644
--- a/fuzzware_pipeline/workers/tracegen.py
+++ b/fuzzware_pipeline/workers/tracegen.py
@@ -6,6 +6,7 @@
 import time
 import uuid
 from pathlib import Path
+from multiprocessing import Pool, Value
 
 import rq
 from fuzzware_pipeline.logging_handler import logging_handler
@@ -67,7 +68,17 @@ def batch_gen_native_traces(config_path, input_paths, extra_args=None, bbl_set_p
 
     gentrace_proc.destroy()
 
-def gen_missing_maindir_traces(maindir, required_trace_prefixes, fuzzer_nums=None, tracedir_postfix="", log_progress=False, verbose=False, crashing_inputs=False, force_overwrite=False):
+# the number of completed tracegen jobs
+# shared over processes, so it needs a lock
+num_processed = None
+
+def init(args):
+    ''' store the counter for later use '''
+    global counter
+    counter = args
+
+
+def gen_missing_maindir_traces(maindir, required_trace_prefixes, fuzzer_nums=None, tracedir_postfix="", log_progress=False, verbose=False, crashing_inputs=False, force_overwrite=False, num_emulators=1):
     projdir = nc.project_base(maindir)
     config_path = nc.config_file_for_main_path(maindir)
     extra_args = parse_extra_args(load_extra_args(nc.extra_args_for_config_path(config_path)), projdir)
@@ -138,30 +149,41 @@ def gen_missing_maindir_traces(maindir, required_trace_prefixes, fuzzer_nums=Non
             logger.info("No traces to generate for main path")
         return
 
-    num_processed = 0
-
     start_time = time.time()
+    # after here, starting time is never written
     if can_use_native_batch:
         input_paths, bbl_set_paths, mmio_set_paths, bbl_hash_paths = jobs_for_config
         batch_gen_native_traces(config_path, input_paths, extra_args, bbl_set_paths, mmio_set_paths, bbl_hash_paths, not verbose)
         if log_progress:
             logger.info(f"Generating traces took {time.time() - start_time:.02f} seconds for {len(input_paths)} input(s)")
     else:
-        num_processed = 0
+        # jobs for config does not have all information we need for a run
+        # but we want everything in a list for mp.map
+        args = []
+        global num_processed 
+        num_processed = Value('i', 0)
         for input_path, bbl_trace_path, ram_trace_path, mmio_trace_path, bbl_set_path, mmio_set_path, bbl_hash_path in jobs_for_config:
-            gen_traces(str(config_path), str(input_path),
-                bbl_trace_path=bbl_trace_path, ram_trace_path=ram_trace_path, mmio_trace_path=mmio_trace_path,
-                bbl_set_path=bbl_set_path, mmio_set_path=mmio_set_path, bbl_hash_path=bbl_hash_path,
-                extra_args=extra_args, silent=not verbose
-            )
-            num_processed += 1
-
-            if log_progress:
-                if num_processed > 0 and num_processed % 50 == 0:
-                    time_passed = round(time.time() - start_time)
-                    relative_done = (num_processed+1) / num_gentrace_jobs
-                    time_estimated = round((relative_done ** (-1)) * time_passed)
-                    logger.info(f"[*] Processed {num_processed}/{num_gentrace_jobs} in {time_passed} seconds. Estimated seconds remaining: {time_estimated-time_passed}")
+            args.append((str(config_path), str(input_path), bbl_trace_path, ram_trace_path, mmio_trace_path, bbl_set_path, mmio_set_path, bbl_hash_path, extra_args, verbose, start_time, log_progress, num_gentrace_jobs))
+        with Pool(num_emulators) as p:
+            p.map(gen_traces_wrapper, args)
+
+def gen_traces_wrapper(job):
+    config_path, input_path, bbl_trace_path, ram_trace_path, mmio_trace_path, bbl_set_path, mmio_set_path, bbl_hash_path, extra_args, verbose, start_time, log_progress, num_gentrace_jobs = job
+    gen_traces(str(config_path), str(input_path),
+        bbl_trace_path=bbl_trace_path, ram_trace_path=ram_trace_path, mmio_trace_path=mmio_trace_path,
+        bbl_set_path=bbl_set_path, mmio_set_path=mmio_set_path, bbl_hash_path=bbl_hash_path,
+        extra_args=extra_args, silent=not verbose
+    )
+    global num_processed
+    with num_processed.get_lock():
+        num_processed.value += 1
+        if log_progress:
+            if num_processed.value > 0 and num_processed.value % 50 == 0:
+                time_passed = round(time.time() - start_time)
+                relative_done = (num_processed.value+1) / num_gentrace_jobs
+                time_estimated = round((relative_done ** (-1)) * time_passed)
+                logger.info(f"[*] Processed {num_processed.value}/{num_gentrace_jobs} in {time_passed} seconds. Estimated seconds remaining: {time_estimated-time_passed}")
+
 
 def gen_all_missing_traces(projdir, trace_name_prefixes=None, log_progress=False, verbose=False, crashing_inputs=False, force_overwrite=False):
     if trace_name_prefixes is None:

From 583bfd54c1c387d19256c8f2acb26da9b8734c08 Mon Sep 17 00:00:00 2001
From: Simeon Hoffmann <simeon.hoffmann@cispa.de>
Date: Tue, 21 Nov 2023 16:30:03 +0100
Subject: [PATCH 2/6] made state sharing more explicit

---
 fuzzware_pipeline/__init__.py         | 2 +-
 fuzzware_pipeline/workers/tracegen.py | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fuzzware_pipeline/__init__.py b/fuzzware_pipeline/__init__.py
index 701ee54..c8522e6 100644
--- a/fuzzware_pipeline/__init__.py
+++ b/fuzzware_pipeline/__init__.py
@@ -728,7 +728,7 @@ def do_gentraces(args, leftover_args):
     if can_use_native_batch:
         print("[+] Using native batch mode as only natively supported traces are to be generated")
     else:
-        print(f"[*] We need non-native traces. Doing this one by one. This could take a while...")
+        print(f"[*] We need non-native traces. This could take a while...")
 
     for main_dir_num in main_dir_nums:
         if main_dir_num > len(project_main_dirs):
diff --git a/fuzzware_pipeline/workers/tracegen.py b/fuzzware_pipeline/workers/tracegen.py
index 32ef5a3..4c1f5c5 100644
--- a/fuzzware_pipeline/workers/tracegen.py
+++ b/fuzzware_pipeline/workers/tracegen.py
@@ -74,8 +74,8 @@ def batch_gen_native_traces(config_path, input_paths, extra_args=None, bbl_set_p
 
 def init(args):
     ''' store the counter for later use '''
-    global counter
-    counter = args
+    global num_processed 
+    num_processed = args
 
 
 def gen_missing_maindir_traces(maindir, required_trace_prefixes, fuzzer_nums=None, tracedir_postfix="", log_progress=False, verbose=False, crashing_inputs=False, force_overwrite=False, num_emulators=1):
@@ -164,7 +164,7 @@ def gen_missing_maindir_traces(maindir, required_trace_prefixes, fuzzer_nums=Non
         num_processed = Value('i', 0)
         for input_path, bbl_trace_path, ram_trace_path, mmio_trace_path, bbl_set_path, mmio_set_path, bbl_hash_path in jobs_for_config:
             args.append((str(config_path), str(input_path), bbl_trace_path, ram_trace_path, mmio_trace_path, bbl_set_path, mmio_set_path, bbl_hash_path, extra_args, verbose, start_time, log_progress, num_gentrace_jobs))
-        with Pool(num_emulators) as p:
+        with Pool(num_emulators, init, (num_processed,)) as p:
             p.map(gen_traces_wrapper, args)
 
 def gen_traces_wrapper(job):

From 287c44b0651d7c03a75eb4af278fe5dcb1ad9abd Mon Sep 17 00:00:00 2001
From: smnhff <117087063+smnhff@users.noreply.github.com>
Date: Mon, 25 Mar 2024 14:03:35 +0000
Subject: [PATCH 3/6] Checkpoints (#1)

changed boot config entries to checkpoints
---
 fuzzware_pipeline/naming_conventions.py |   8 ++
 fuzzware_pipeline/pipeline.py           | 101 +++++++++++++++++++++++-
 fuzzware_pipeline/run_target.py         |   7 +-
 fuzzware_pipeline/session.py            |  66 +++++++++++-----
 4 files changed, 157 insertions(+), 25 deletions(-)

diff --git a/fuzzware_pipeline/naming_conventions.py b/fuzzware_pipeline/naming_conventions.py
index 4145c9e..95f83fe 100644
--- a/fuzzware_pipeline/naming_conventions.py
+++ b/fuzzware_pipeline/naming_conventions.py
@@ -88,6 +88,14 @@
 MEM_ACCESS_MODE_READ = "r"
 MEM_ACCESS_MODE_WRITE = "w"
 
+# the checkpoint naming
+CONFIG_ENTRY_CATEGORY_CHECKPOINTS = 'checkpoints'
+CONFIG_ENTRY_NAME_CHECKPOINTS_REQUIRED = 'required'
+CONFIG_ENTRY_NAME_CHECKPOINTS_BLACKLISTED = 'blacklisted'
+CONFIG_ENTRY_NAME_CHECKPOINTS_AVOID = 'avoid'
+CONFIG_ENTRY_NAME_CHECKPOINTS_TARGET = 'target'
+
+# the boot naming
 CONFIG_ENTRY_CATEGORY_BOOT = 'boot'
 CONFIG_ENTRY_NAME_BOOT_REQUIRED = 'required'
 CONFIG_ENTRY_NAME_BOOT_BLACKLISTED = 'blacklisted'
diff --git a/fuzzware_pipeline/pipeline.py b/fuzzware_pipeline/pipeline.py
index 78cf52f..d860f97 100644
--- a/fuzzware_pipeline/pipeline.py
+++ b/fuzzware_pipeline/pipeline.py
@@ -50,6 +50,10 @@ class Pipeline:
     boot_avoided_bbls: set
     groundtruth_valid_basic_blocks: set
     groundtruth_milestone_basic_blocks: set
+    # checkpoint logic
+    checkpoints: dict
+    current_checkpoint: dict
+    current_checkpoint_name: str
 
     # Runtime state
     start_time: int
@@ -201,8 +205,12 @@ def check_emulator_dry(self):
             exit(1)
         logger.info("Emulator dry-run successful!")
         os.remove(dry_input)
-
+    
     def parse_pipeline_yml_config(self, full_config):
+        self.parse_pipeline_boot_config(full_config)
+        self.parse_pipeline_checkpoint_config(full_config)
+
+    def parse_pipeline_boot_config(self, full_config):
         self.boot_avoided_bbls = set()
         self.boot_required_bbls = set()
         boot_config = full_config.get(CONFIG_ENTRY_CATEGORY_BOOT)
@@ -220,6 +228,49 @@ def parse_pipeline_yml_config(self, full_config):
             logger.debug("Avoid list: " + " ".join([hex(addr) for addr in self.boot_avoided_bbls]))
             logger.debug("Required: " + " ".join([hex(addr) for addr in self.boot_required_bbls]))
 
+    def parse_pipeline_checkpoint_config(self, full_config):
+        checkpoint_config = full_config.get(CONFIG_ENTRY_CATEGORY_CHECKPOINTS)
+        checkpoint_configs = {}
+        if checkpoint_config:
+            # this is a list of checkpoint objects
+            # this is the same order as in the config, as dicts preserve insertion order
+            for checkpoint_name in checkpoint_config.keys():
+                single_checkpoint_parsed = self.parse_single_checkpoint(checkpoint_config[checkpoint_name])
+                checkpoint_configs[checkpoint_name] = single_checkpoint_parsed
+            self.checkpoints = checkpoint_configs
+            first_checkpoint_key = list(self.checkpoints.keys())[0]
+            # the current checkpoint always holds the next checkpoint to reach
+            self.current_checkpoint = checkpoint_configs[first_checkpoint_key]
+            self.current_checkpoint_name = first_checkpoint_key
+            # if we have checkpoints, make the last checkpoint the booted_bbl
+            last_checkpoint_key = list(self.checkpoints.keys())[-1]
+            last_checkpoint = checkpoint_configs[last_checkpoint_key]
+            if last_checkpoint["required_bbls"]:
+                self.boot_required_bbls = last_checkpoint["required_bbls"]
+            if last_checkpoint["avoided_bbls"]:
+                self.boot_required_bbls = last_checkpoint["avoided_bbls"]
+            if self.booted_bbl == DEFAULT_IDLE_BBL:
+                self.booted_bbl = last_checkpoint["checkpoint_target"]
+
+    def parse_single_checkpoint(self, checkpoint_config):
+        checkpoint = {}
+        # this parses a single checkpoint
+        required_bbls = checkpoint_config.get(CONFIG_ENTRY_NAME_CHECKPOINTS_REQUIRED)
+        if required_bbls:
+            checkpoint["required_bbls"] = set(map(lambda v: parse_address_value(self.symbols, v)&(~1), required_bbls))
+        else:
+            # without else, the entry is not initialised if missing
+            checkpoint["required_bbls"] = set()
+        avoided_bbls = checkpoint_config.get(CONFIG_ENTRY_NAME_CHECKPOINTS_AVOID) or checkpoint_config.get(CONFIG_ENTRY_NAME_CHECKPOINTS_BLACKLISTED)
+        if avoided_bbls:
+            checkpoint["avoided_bbls"] = set(map(lambda v: parse_address_value(self.symbols, v)&(~1), avoided_bbls))
+        else:
+            # without else, the entry is not initialised if missing
+            checkpoint["avoided_bbls"] = set()
+        # this one is mandatory, so no check before
+        checkpoint["checkpoint_target"] = parse_address_value(self.symbols, checkpoint_config[CONFIG_ENTRY_NAME_CHECKPOINTS_TARGET]) & (~1)
+        return checkpoint
+
     def parse_ground_truth_files(self):
         valid_bb_list_path = self.valid_basic_block_list_path
         if os.path.exists(valid_bb_list_path):
@@ -231,6 +282,9 @@ def parse_ground_truth_files(self):
 
     def __init__(self, parent_dir, name, base_inputs, num_main_fuzzer_procs, disable_modeling=False, write_worker_logs=False, do_full_tracing=False, config_name=SESS_FILENAME_CONFIG, timeout_seconds=0, use_aflpp=False):
         self.booted_bbl = DEFAULT_IDLE_BBL
+        self.checkpoints = {}
+        self.current_checkpoint = {}
+        self.current_checkpoint_name = ""
         self.disable_modeling = disable_modeling
         self.shutdown_requested = False
         self.sessions = {}
@@ -430,6 +484,30 @@ def is_successfully_booted(self, bbl_set):
                 (not self.boot_required_bbls - bbl_set)
         )
 
+    # this checks if the currently defined checkpoint is hit 
+    def checkpoint_progress(self, bbl_set):
+        tmp = self.current_checkpoint["checkpoint_target"]
+        # did we find our checkpoint?
+        return self.current_checkpoint and (self.current_checkpoint["checkpoint_target"] in bbl_set) and (
+            # And no blacklist addresses found and all whitelist addresses in bbl set
+            (not self.current_checkpoint["avoided_bbls"] & bbl_set) and \
+                (not self.current_checkpoint["required_bbls"] - bbl_set)
+        )
+
+    # set all the fields to the next checkpoint
+    def update_checkpoint(self):
+        if self.checkpoints:
+            checkpoint_names = list(self.checkpoints.keys())
+            current_index = checkpoint_names.index(self.current_checkpoint_name)
+            # is this already the last checkpoint?
+            if (current_index + 1) == len(checkpoint_names):
+                # then return and do nothing
+                return
+            else:
+                # otherwise update checkpoint name and current checkpoint
+                self.current_checkpoint_name = checkpoint_names[current_index+1]
+                self.current_checkpoint = self.checkpoints[self.current_checkpoint_name]
+
     def choose_next_session_inputs(self, config_map):
         """
         Determines different sets of input file paths, ordered by desirability
@@ -519,6 +597,7 @@ def add_main_session(self, prefix_input_candidate=None):
         # Before adding the new session, get the possibly previously used prefix path
         is_previously_used_prefix = False
         if self.curr_main_sess_index and self.curr_main_session.prefix_input_path:
+            logger.debug(f"We have a prefix from the previous session: {self.curr_main_session.prefix_input_path}")
             is_previously_used_prefix = True
             prefix_input_candidate = self.curr_main_session.prefix_input_path
 
@@ -528,16 +607,20 @@ def add_main_session(self, prefix_input_candidate=None):
 
         # Try different sets of inputs in order of quality
         start_success = False
-        for input_path_list in self.choose_next_session_inputs(config_map):
+        input_paths = self.choose_next_session_inputs(config_map)
+        for input_path_list in input_paths:
             # We have previous inputs, carry them over
             logger.debug("Copying over {} inputs".format(len(input_path_list)))
 
             new_sess_inputs_dir = self.curr_main_session.base_input_dir
+            logger.debug(f"Creating directory {new_sess_inputs_dir}")
             os.mkdir(new_sess_inputs_dir)
             for path in input_path_list:
+                logger.debug(f"Copying {path} to {new_sess_inputs_dir}")
                 shutil.copy2(path, new_sess_inputs_dir)
-
             self.curr_main_session.minimize_inputs(prefix_candidate_path=prefix_input_candidate, is_previously_used_prefix=is_previously_used_prefix)
+            tmp = os.listdir(self.curr_main_session.base_input_dir)
+            logger.debug(f"Current base input dir content 4: {tmp}")
             # Try the inputs
             if self.curr_main_session.start_fuzzers():
                 start_success = True
@@ -664,12 +747,21 @@ def handle_queue_forever(self):
                                     logger.info(f"Discovered milestone basic block: 0x{pc:08x}{sym_suffix}")
                                     self.visited_milestone_basic_blocks.add(pc)
                             self.visited_translation_blocks |= new_bbs
-
+                        # if this is hit, we are done with our checkpoints!
                         if (not (self.curr_main_session.prefix_input_path or pending_prefix_candidate)) and self.is_successfully_booted(bbl_set):
                             logger.info("FOUND MAIN ADDRESS for trace file: '{}'".format(trace_filename))
                             pending_prefix_candidate = input_for_trace_path(trace_file_path)
                             restart_pending = True
                             self.curr_main_session.kill_fuzzers()
+                        # if not, we need to check if we hit one of our checkpoints
+                        elif (not (self.curr_main_session.prefix_input_path or pending_prefix_candidate)) and self.checkpoint_progress(bbl_set):
+                            # we found our checkpoint and fulfilled avoid/visit conditions
+                            logger.info("FOUND CHECKPOINT {} for trace file: '{}'".format(self.current_checkpoint_name, trace_filename))
+                            # set our current input as prefix and restart
+                            pending_prefix_candidate = input_for_trace_path(trace_file_path)
+                            restart_pending = True
+                            # I think we cannot update our checkpoint here because it is still needed for prefix size computation 
+                            self.curr_main_session.kill_fuzzers()
 
                         logger.debug("Looking at new MMIO access set")
                         # For every new mmio access trace we get, trigger state generation for unique pc/mmio_addr pairs
@@ -705,6 +797,7 @@ def handle_queue_forever(self):
                             restart_pending, num_config_updates = False, 0
                             self.curr_main_session.shutdown()
                             self.add_main_session(pending_prefix_candidate)
+                            logger.debug(f"Updated to checkpoint {self.current_checkpoint_name}")
                             pending_prefix_candidate = None
                             time_latest_new_basic_block = None
                         else:
diff --git a/fuzzware_pipeline/run_target.py b/fuzzware_pipeline/run_target.py
index 9f5f792..d8ca8b7 100644
--- a/fuzzware_pipeline/run_target.py
+++ b/fuzzware_pipeline/run_target.py
@@ -23,5 +23,10 @@ def run_target(config_path, input_path, extra_args, get_output=False, silent=Fal
         logger.debug("Full command: {}".format(" ".join(arg_list)))
 
     if get_output:
-        return subprocess.run(arg_list, check=False, stdout=subprocess.PIPE).stdout
+        out = subprocess.run(arg_list, check=False, stdout=subprocess.PIPE).stdout
+        err = subprocess.run(arg_list, check=False, stdout=subprocess.PIPE).stderr
+        logger.debug(f"subprocess stdout: {out}")
+        logger.debug(f"subprocess stderr: {err}")
+        return out
+    #     return subprocess.run(arg_list, check=False, stdout=subprocess.PIPE).stdout
     return subprocess.call(arg_list, stdout=stdout, stderr=stderr)
diff --git a/fuzzware_pipeline/session.py b/fuzzware_pipeline/session.py
index 5b79dd9..ed94b0f 100644
--- a/fuzzware_pipeline/session.py
+++ b/fuzzware_pipeline/session.py
@@ -136,21 +136,30 @@ def start_fuzzer(self, fuzzer_num):
         self.fuzzers.append(fuzzer)
         return fuzzer.start(silent=True)
 
-    def get_booting_prefix_size(self, input_path):
+    def get_progress_prefix_size(self, input_path):
         """
         For an input file located at input_path, find the prefix size required to reach successful boot.
 
         If booting successful, returns the size of the input prefix.
         Otherwise, returns None
         """
-        gen_traces(self.config_path, input_path, mmio_trace_path=self.temp_mmio_trace_path, bbl_set_path=self.temp_bbl_set_path, extra_args=["--exit-at", "0x{:x}".format(self.parent.booted_bbl)])
+        checkpoint_target = self.parent.current_checkpoint["checkpoint_target"]
+        gen_traces(self.config_path, input_path, mmio_trace_path=self.temp_mmio_trace_path, bbl_set_path=self.temp_bbl_set_path, extra_args=["--exit-at", "0x{:x}".format(checkpoint_target)])
         bbl_set = set(parse_bbl_set(self.temp_bbl_set_path))
-        if not self.parent.is_successfully_booted(bbl_set):
+        # did we find the last checkpoint?
+        checkpoints_done = self.parent.is_successfully_booted(bbl_set);
+        # did we find our current checkpoint?
+        checkpoint_progress = self.parent.checkpoint_progress(bbl_set);
+        # if neither happened, we do not have an interesting prefix
+        if not (checkpoints_done or checkpoint_progress):
             return None
 
         prefix_size = None
-        for _, _, _, mode, _, access_fuzz_ind, num_consumed_fuzz_bytes, _, _ in parse_mmio_trace(self.temp_mmio_trace_path)[::-1]:
+        # count all the consumptions
+        for evt_id, pc, lr, mode, access_size, access_fuzz_ind, num_consumed_fuzz_bytes, address, _ in parse_mmio_trace(self.temp_mmio_trace_path)[::-1]:
             if mode == "r":
+                logger.debug(f"found a memory access with the following properties: \n \
+                        pc: {pc}, lr: {lr}, access size: {access_size}, access indicator: {access_fuzz_ind}, num consumed bytes {num_consumed_fuzz_bytes}, address: {address}")
                 prefix_size = access_fuzz_ind + num_consumed_fuzz_bytes
                 break
 
@@ -161,13 +170,14 @@ def get_booting_prefix_size(self, input_path):
             # Try expanding input and re-running for a number of times
             for _ in range(16):
                 copy_prefix_to(self.temp_prefix_input_path, input_path, prefix_size)
-                gen_traces(self.config_path, self.temp_prefix_input_path, mmio_trace_path=self.temp_mmio_trace_path, bbl_set_path=self.temp_bbl_set_path, extra_args=["--exit-at", "0x{:x}".format(self.parent.booted_bbl)])
+                # gen_traces(self.config_path, self.temp_prefix_input_path, mmio_trace_path=self.temp_mmio_trace_path, bbl_set_path=self.temp_bbl_set_path, extra_args=["--exit-at", "0x{:x}".format(self.parent.booted_bbl)])
+                checkpoint_target = self.parent.current_checkpoint["checkpoint_target"]
+                gen_traces(self.config_path, self.temp_prefix_input_path, mmio_trace_path=self.temp_mmio_trace_path, bbl_set_path=self.temp_bbl_set_path, extra_args=["--exit-at", "0x{:x}".format(checkpoint_target)])
                 bbl_set = set(parse_bbl_set(self.temp_bbl_set_path))
-
-                if self.parent.is_successfully_booted(bbl_set):
+                # if we are done with our checkpoints or reached our current checkpoint, return the prefix size
+                if self.parent.is_successfully_booted(bbl_set) or self.parent.checkpoint_progress(bbl_set):
                     return prefix_size
                 prefix_size += 1
-
         return None
 
     def emulator_args(self):
@@ -197,25 +207,39 @@ def minimize_inputs(self, silent=False, prefix_candidate_path=None, is_previousl
 
         # Handle cases where prefix candidate is passed
         if prefix_candidate_path:
-            booting_prefix_size = self.get_booting_prefix_size(prefix_candidate_path)
-            is_booted_successfully = booting_prefix_size is not None
+            # this returns none when we do not have a prefix
+            # if it is not none, the current checkpoint is no longer needed 
+            progress_prefix_size = self.get_progress_prefix_size(prefix_candidate_path)
+            did_some_progress = progress_prefix_size is not None
             if is_previously_used_prefix:
-                if is_booted_successfully:
+                if did_some_progress:
+                    # the old prefix has had its update processed already
+                    # technically, this cannot happen. An old prefix cannot progress 
+                    # further in the checkpoints, can it?
+                    # self.parent.update_checkpoint()
                     # A previously booting prefix still boots.
                     # Set the booting prefix and prepend remainder to input files
-                    self.save_prefix_input(prefix_candidate_path, booting_prefix_size)
-                    prepend_to_all(self.base_input_dir, prefix_candidate_path, from_offset=booting_prefix_size)
+                    self.save_prefix_input(prefix_candidate_path, progress_prefix_size)
+                    prepend_to_all(self.base_input_dir, prefix_candidate_path, from_offset=progress_prefix_size)
                 else:
-                    # The input no longer successfully boots the image
+                    # the prefix did not make progress. Still, keep it as we need it to reach the next prefix
                     # Attach the no longer booting prefix to input files and minimize without prefix
+                    self.save_prefix_input(prefix_candidate_path, progress_prefix_size)
                     prepend_to_all(self.base_input_dir, prefix_candidate_path)
             else:
-                if is_booted_successfully:
+                if did_some_progress:
+                    # Update the checkpoint since we are not none
+                    # does not matter if we are the last checkpoint
+                    # in this case, update checkpoint does nothing
+                    self.parent.update_checkpoint()
                     # A brand new booting input was discovered, use it as new input prefix and reset to generic inputs
                     # extract prefix from input, copy over generic base inputs
+                    logger.debug(f"Reached a checkpoint, deleting {self.base_input_dir} and resetting to {self.parent.generic_inputs_dir}")
+                    tmp = os.listdir(self.base_input_dir)
+                    logger.debug(f"Current base input dir content 1: {tmp}")
                     shutil.rmtree(self.base_input_dir)
                     shutil.copytree(self.parent.generic_inputs_dir, self.base_input_dir)
-                    self.save_prefix_input(prefix_candidate_path, booting_prefix_size)
+                    self.save_prefix_input(prefix_candidate_path, progress_prefix_size)
                     # No minimization or input corpus adjustment required in this case, return
                     return
         else:
@@ -223,6 +247,7 @@ def minimize_inputs(self, silent=False, prefix_candidate_path=None, is_previousl
             pass
 
         # Perform minimization. In case an input prefix is used, this is already saved in self.extra_runtime_args
+        logger.debug(f"Moving {self.base_input_dir} to {self.temp_minimization_dir}")
         shutil.move(self.base_input_dir, self.temp_minimization_dir)
         harness_args = self.emulator_args()
 
@@ -230,11 +255,12 @@ def minimize_inputs(self, silent=False, prefix_candidate_path=None, is_previousl
             run_corpus_minimizer(harness_args, self.temp_minimization_dir, self.base_input_dir, silent=silent, use_aflpp=self.parent.use_aflpp)
             if not os.listdir(self.base_input_dir):
                 self.parent.add_warning_line("Minimization for fuzzing session '{}' had no inputs remaining, copying generic inputs.".format(self.name))
+                logger.debug(f"Minimisation did not find a base dir and is copying over the generic dir")
                 shutil.rmtree(self.base_input_dir, True)
                 shutil.copytree(self.parent.generic_inputs_dir, self.base_input_dir)
-        except subprocess.CalledProcessError:
+        except subprocess.CalledProcessError as e:
             self.parent.add_warning_line("Minimization for fuzzing session '{}' failed, copying full inputs.".format(self.name))
-
+            
             # In case minimization does not work out, copy all inputs
             shutil.rmtree(self.base_input_dir, True)
             shutil.copytree(self.temp_minimization_dir, self.base_input_dir)
@@ -291,11 +317,11 @@ def start_fuzzers(self):
                 logger.warning("[TRIAGING STEP 1] ... Output end")
 
                 logger.warning("\n\n[TRIAGING STEP 2] Re-running single emulation run, showing its output...")
-                run_target(self.config_path, first_file(self.base_input_dir), self.extra_runtime_args + [ "-v" ])
+                run_target(self.config_path, first_file(self.base_input_dir), self.extra_runtime_args + [ "-v" ], get_output=True)
                 logger.warning("[TRIAGING STEP 2] ... Output end\n")
 
                 logger.warning("\n\n[TRIAGING STEP 3] Re-running single emulation run with .cur_input file, showing its output...")
-                run_target(self.config_path, self.fuzzer_cur_input_path(instance.inst_num), self.extra_runtime_args + [ "-v" ])
+                run_target(self.config_path, self.fuzzer_cur_input_path(instance.inst_num), self.extra_runtime_args + [ "-v" ], get_output=True)
                 logger.warning("[TRIAGING STEP 3] ... Output end\n")
 
                 return False

From a4f871919c17ee45e3676807aa893d06961b6fdb Mon Sep 17 00:00:00 2001
From: Simeon Hoffmann <simeon.hoffmann@cispa.de>
Date: Mon, 25 Mar 2024 15:15:41 +0100
Subject: [PATCH 4/6] removed missed debug statements

---
 fuzzware_pipeline/pipeline.py   | 6 ------
 fuzzware_pipeline/run_target.py | 8 +-------
 fuzzware_pipeline/session.py    | 7 -------
 3 files changed, 1 insertion(+), 20 deletions(-)

diff --git a/fuzzware_pipeline/pipeline.py b/fuzzware_pipeline/pipeline.py
index d860f97..f627527 100644
--- a/fuzzware_pipeline/pipeline.py
+++ b/fuzzware_pipeline/pipeline.py
@@ -597,7 +597,6 @@ def add_main_session(self, prefix_input_candidate=None):
         # Before adding the new session, get the possibly previously used prefix path
         is_previously_used_prefix = False
         if self.curr_main_sess_index and self.curr_main_session.prefix_input_path:
-            logger.debug(f"We have a prefix from the previous session: {self.curr_main_session.prefix_input_path}")
             is_previously_used_prefix = True
             prefix_input_candidate = self.curr_main_session.prefix_input_path
 
@@ -613,14 +612,10 @@ def add_main_session(self, prefix_input_candidate=None):
             logger.debug("Copying over {} inputs".format(len(input_path_list)))
 
             new_sess_inputs_dir = self.curr_main_session.base_input_dir
-            logger.debug(f"Creating directory {new_sess_inputs_dir}")
             os.mkdir(new_sess_inputs_dir)
             for path in input_path_list:
-                logger.debug(f"Copying {path} to {new_sess_inputs_dir}")
                 shutil.copy2(path, new_sess_inputs_dir)
             self.curr_main_session.minimize_inputs(prefix_candidate_path=prefix_input_candidate, is_previously_used_prefix=is_previously_used_prefix)
-            tmp = os.listdir(self.curr_main_session.base_input_dir)
-            logger.debug(f"Current base input dir content 4: {tmp}")
             # Try the inputs
             if self.curr_main_session.start_fuzzers():
                 start_success = True
@@ -797,7 +792,6 @@ def handle_queue_forever(self):
                             restart_pending, num_config_updates = False, 0
                             self.curr_main_session.shutdown()
                             self.add_main_session(pending_prefix_candidate)
-                            logger.debug(f"Updated to checkpoint {self.current_checkpoint_name}")
                             pending_prefix_candidate = None
                             time_latest_new_basic_block = None
                         else:
diff --git a/fuzzware_pipeline/run_target.py b/fuzzware_pipeline/run_target.py
index d8ca8b7..538e8de 100644
--- a/fuzzware_pipeline/run_target.py
+++ b/fuzzware_pipeline/run_target.py
@@ -23,10 +23,4 @@ def run_target(config_path, input_path, extra_args, get_output=False, silent=Fal
         logger.debug("Full command: {}".format(" ".join(arg_list)))
 
     if get_output:
-        out = subprocess.run(arg_list, check=False, stdout=subprocess.PIPE).stdout
-        err = subprocess.run(arg_list, check=False, stdout=subprocess.PIPE).stderr
-        logger.debug(f"subprocess stdout: {out}")
-        logger.debug(f"subprocess stderr: {err}")
-        return out
-    #     return subprocess.run(arg_list, check=False, stdout=subprocess.PIPE).stdout
-    return subprocess.call(arg_list, stdout=stdout, stderr=stderr)
+         return subprocess.run(arg_list, check=False, stdout=subprocess.PIPE).stdout
diff --git a/fuzzware_pipeline/session.py b/fuzzware_pipeline/session.py
index ed94b0f..975549a 100644
--- a/fuzzware_pipeline/session.py
+++ b/fuzzware_pipeline/session.py
@@ -158,8 +158,6 @@ def get_progress_prefix_size(self, input_path):
         # count all the consumptions
         for evt_id, pc, lr, mode, access_size, access_fuzz_ind, num_consumed_fuzz_bytes, address, _ in parse_mmio_trace(self.temp_mmio_trace_path)[::-1]:
             if mode == "r":
-                logger.debug(f"found a memory access with the following properties: \n \
-                        pc: {pc}, lr: {lr}, access size: {access_size}, access indicator: {access_fuzz_ind}, num consumed bytes {num_consumed_fuzz_bytes}, address: {address}")
                 prefix_size = access_fuzz_ind + num_consumed_fuzz_bytes
                 break
 
@@ -234,9 +232,6 @@ def minimize_inputs(self, silent=False, prefix_candidate_path=None, is_previousl
                     self.parent.update_checkpoint()
                     # A brand new booting input was discovered, use it as new input prefix and reset to generic inputs
                     # extract prefix from input, copy over generic base inputs
-                    logger.debug(f"Reached a checkpoint, deleting {self.base_input_dir} and resetting to {self.parent.generic_inputs_dir}")
-                    tmp = os.listdir(self.base_input_dir)
-                    logger.debug(f"Current base input dir content 1: {tmp}")
                     shutil.rmtree(self.base_input_dir)
                     shutil.copytree(self.parent.generic_inputs_dir, self.base_input_dir)
                     self.save_prefix_input(prefix_candidate_path, progress_prefix_size)
@@ -247,7 +242,6 @@ def minimize_inputs(self, silent=False, prefix_candidate_path=None, is_previousl
             pass
 
         # Perform minimization. In case an input prefix is used, this is already saved in self.extra_runtime_args
-        logger.debug(f"Moving {self.base_input_dir} to {self.temp_minimization_dir}")
         shutil.move(self.base_input_dir, self.temp_minimization_dir)
         harness_args = self.emulator_args()
 
@@ -255,7 +249,6 @@ def minimize_inputs(self, silent=False, prefix_candidate_path=None, is_previousl
             run_corpus_minimizer(harness_args, self.temp_minimization_dir, self.base_input_dir, silent=silent, use_aflpp=self.parent.use_aflpp)
             if not os.listdir(self.base_input_dir):
                 self.parent.add_warning_line("Minimization for fuzzing session '{}' had no inputs remaining, copying generic inputs.".format(self.name))
-                logger.debug(f"Minimisation did not find a base dir and is copying over the generic dir")
                 shutil.rmtree(self.base_input_dir, True)
                 shutil.copytree(self.parent.generic_inputs_dir, self.base_input_dir)
         except subprocess.CalledProcessError as e:

From e0fbf52cbb3b25462454eece3a96beed22c2c807 Mon Sep 17 00:00:00 2001
From: Simeon Hoffmann <simeon.hoffmann@cispa.de>
Date: Mon, 25 Mar 2024 15:56:56 +0100
Subject: [PATCH 5/6] added wrongly removed statement back

---
 fuzzware_pipeline/run_target.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fuzzware_pipeline/run_target.py b/fuzzware_pipeline/run_target.py
index 538e8de..2825a7b 100644
--- a/fuzzware_pipeline/run_target.py
+++ b/fuzzware_pipeline/run_target.py
@@ -24,3 +24,4 @@ def run_target(config_path, input_path, extra_args, get_output=False, silent=Fal
 
     if get_output:
          return subprocess.run(arg_list, check=False, stdout=subprocess.PIPE).stdout
+    return subprocess.call(arg_list, stdout=stdout, stderr=stderr)

From 0d38717f56c0f8d85864704b6d473b8b5c632d65 Mon Sep 17 00:00:00 2001
From: smnhff <117087063+smnhff@users.noreply.github.com>
Date: Mon, 25 Mar 2024 15:10:26 +0000
Subject: [PATCH 6/6] small bugfix (#2)

---
 fuzzware_pipeline/pipeline.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fuzzware_pipeline/pipeline.py b/fuzzware_pipeline/pipeline.py
index f627527..8e09139 100644
--- a/fuzzware_pipeline/pipeline.py
+++ b/fuzzware_pipeline/pipeline.py
@@ -486,7 +486,6 @@ def is_successfully_booted(self, bbl_set):
 
     # this checks if the currently defined checkpoint is hit 
     def checkpoint_progress(self, bbl_set):
-        tmp = self.current_checkpoint["checkpoint_target"]
         # did we find our checkpoint?
         return self.current_checkpoint and (self.current_checkpoint["checkpoint_target"] in bbl_set) and (
             # And no blacklist addresses found and all whitelist addresses in bbl set