add core bpf mode with new diff fn

buffalojoec · buffalojoec · commit 59403ab1b39a · 2024-12-06T08:45:49.000Z
diff --git a/commands.md b/commands.md
@@ -231,6 +231,7 @@ $ solana-test-suite run-tests [OPTIONS]
 * `-c, --chunk-size INTEGER`: Number of test results per file  [default: 10000]
 * `-v, --verbose`: Verbose output: log failed test cases
 * `-c, --consensus-mode`: Only fail on consensus failures. One such effect is to normalize error codes when comparing results
+* `-cb, --core-bpf-mode`: Deliberately skip known mismatches between BPF programs and builtins, only failing on genuine mimatches. For example, builtin programs may throw errors on readonly account state violations sooner than BPF programs, compute unit usage will be different, etc. This feature is primarily used to test a BPF program against a builtin.
 * `-f, --failures-only`: Only log failed test cases
 * `-sf, --save-failures`: Saves failed test cases to results directory
 * `-ss, --save-successes`: Saves successful test cases to results directory
diff --git a/src/test_suite/instr/diff_utils.py b/src/test_suite/instr/diff_utils.py
@@ -1,6 +1,24 @@
+from enum import Enum
 import test_suite.invoke_pb2 as invoke_pb
 
 
+class DiffMode(Enum):
+    STANDARD = 0
+    CONSENSUS = 1
+    CORE_BPF = 2
+
+    def apply_diff(self, a: invoke_pb.InstrEffects, b: invoke_pb.InstrEffects):
+        """Applies the specified diff effects.
+        - STANDARD: No diff effects.
+        - CONSENSUS: Consensus-only diff effects.
+        - CORE_BPF: Core BPF diff effects for testing a BPF program against a builtin.
+        """
+        if self == DiffMode.CONSENSUS:
+            return consensus_instr_diff_effects(a, b)
+        if self == DiffMode.CORE_BPF:
+            return core_bpf_instr_diff_effects(a, b)
+
+
 def consensus_instr_diff_effects(a: invoke_pb.InstrEffects, b: invoke_pb.InstrEffects):
     a_san = invoke_pb.InstrEffects()
     a_san.CopyFrom(a)
@@ -17,3 +35,29 @@ def consensus_instr_diff_effects(a: invoke_pb.InstrEffects, b: invoke_pb.InstrEf
     b_san.cu_avail = 0
 
     return a_san == b_san
+
+
+def core_bpf_instr_diff_effects(a: invoke_pb.InstrEffects, b: invoke_pb.InstrEffects):
+    a_san = invoke_pb.InstrEffects()
+    a_san.CopyFrom(a)
+    b_san = invoke_pb.InstrEffects()
+    b_san.CopyFrom(b)
+
+    # If the result is an error (not 0), don't return modified accounts.
+    if a_san.result != 0:
+        while len(a_san.modified_accounts) > 0:
+            a_san.modified_accounts.pop()
+    if b_san.result != 0:
+        while len(b_san.modified_accounts) > 0:
+            b_san.modified_accounts.pop()
+
+    # Normalize error codes and cus
+    a_san.result = 0
+    a_san.custom_err = 0
+    a_san.cu_avail = 0
+
+    b_san.result = 0
+    b_san.custom_err = 0
+    b_san.cu_avail = 0
+
+    return a_san == b_san
diff --git a/src/test_suite/multiprocessing_utils.py b/src/test_suite/multiprocessing_utils.py
@@ -314,11 +314,8 @@ def build_test_results(
             effects = harness_ctx.effects_type()
             effects.ParseFromString(result)
 
-            if globals.consensus_mode:
-                harness_ctx.diff_effect_fn = harness_ctx.consensus_diff_effect_fn
-
-            # Note: diff_effect_fn may modify effects in-place
-            all_passed &= harness_ctx.diff_effect_fn(ref_effects, effects)
+            # Note: apply_diff may modify effects in-place
+            all_passed &= globals.diff_mode.apply_diff(ref_effects, effects)
 
             harness_ctx.effects_human_encode_fn(effects)
             outputs[target] = text_format.MessageToString(effects)
diff --git a/src/test_suite/test_suite.py b/src/test_suite/test_suite.py
@@ -17,6 +17,7 @@
     extract_context_from_fixture,
     write_fixture_to_disk,
 )
+from test_suite.instr.diff_utils import DiffMode
 from test_suite.log_utils import log_results
 from test_suite.multiprocessing_utils import (
     decode_single_test_case,
@@ -355,6 +356,14 @@ def run_tests(
         "-c",
         help="Only fail on consensus failures. One such effect is to normalize error codes when comparing results",
     ),
+    core_bpf_mode: bool = typer.Option(
+        False,
+        "--core-bpf-mode",
+        "-cb",
+        help="Deliberately skip known mismatches between BPF programs and builtins, only failing on genuine mimatches. \
+For example, builtin programs may throw errors on readonly account state violations sooner than BPF programs, \
+compute unit usage will be different, etc. This feature is primarily used to test a BPF program against a builtin.",
+    ),
     failures_only: bool = typer.Option(
         False,
         "--failures-only",
@@ -388,8 +397,19 @@ def run_tests(
     globals.reference_shared_library = reference_shared_library
     globals.default_harness_ctx = HARNESS_MAP[default_harness_ctx]
 
-    # Set diff mode to consensus if specified
-    globals.consensus_mode = consensus_mode
+    # Set diff mode if specified
+    if consensus_mode and core_bpf_mode:
+        typer.echo(
+            "Error: --consensus-mode and --core-bpf-mode cannot be used together.",
+            err=True,
+        )
+        raise typer.Exit(code=1)
+    if consensus_mode:
+        globals.diff_mode = DiffMode.CONSENSUS
+    elif core_bpf_mode:
+        globals.diff_mode = DiffMode.CORE_BPF
+    else:
+        globals.diff_mode = DIffMode.STANDARD
 
     # Create the output directory, if necessary
     if globals.output_dir.exists():
@@ -703,6 +723,7 @@ def debug_mismatches(
         log_chunk_size=10000,
         verbose=True,
         consensus_mode=False,
+        core_bpf_mode=False,
         failures_only=False,
         save_failures=True,
         save_successes=True,