From e8f19009bbc3c3ba9bea2d97ade10f83682593e2 Mon Sep 17 00:00:00 2001 From: Hao Wu Date: Mon, 9 Jun 2025 17:59:02 -0400 Subject: [PATCH 01/14] enable CI with TRITON_INTERPRET=1; format trace.py using pre-commit --- .github/workflows/python-app.yml | 12 ++++++++---- triton_viz/core/trace.py | 10 ++++++++-- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 3bc0f3e1..6adabd6a 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -5,11 +5,13 @@ name: Python application on: push: - branches-ignore: - - '**' + branches: + - main + - keren/v2.0 pull_request: - branches-ignore: - - '**' + branches: + - main + - keren/v2.0 permissions: contents: read @@ -54,6 +56,8 @@ jobs: pip install -e . - name: Test with pytest + env: | + TRITON_INTERPRET=1 run: | cd triton_viz python -m pytest tests diff --git a/triton_viz/core/trace.py b/triton_viz/core/trace.py index 44ff8d5e..ef4f2ed1 100644 --- a/triton_viz/core/trace.py +++ b/triton_viz/core/trace.py @@ -34,7 +34,11 @@ def add_client(self, new_client: Union[Client, str]) -> None: new_client_instance = self._normalize_client(new_client) self.client_manager.add_clients([new_client_instance]) - def __init__(self, kernel: Union[JITFunction, InterpretedFunction], client: Union[str, Client]) -> None: + def __init__( + self, + kernel: Union[JITFunction, InterpretedFunction], + client: Union[str, Client], + ) -> None: self.fn = kernel if isinstance(kernel, InterpretedFunction): self.interpreter_fn = kernel @@ -91,7 +95,9 @@ def decorator(kernel) -> Trace: trace.add_client(clients) return trace - raise TypeError(f"Expected JITFunction, InterpretedFunction or Trace, got {type(kernel)}") + raise TypeError( + f"Expected JITFunction, InterpretedFunction or Trace, got {type(kernel)}" + ) return decorator From 4518dbce4a8ce7714a86491b104386cfebbde97b Mon Sep 17 00:00:00 2001 From: Hao Wu Date: Mon, 9 Jun 2025 18:03:23 -0400 Subject: [PATCH 02/14] fix yaml format --- .github/workflows/python-app.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 6adabd6a..0edc70d0 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -57,7 +57,7 @@ jobs: - name: Test with pytest env: | - TRITON_INTERPRET=1 + TRITON_INTERPRET: "1" run: | cd triton_viz python -m pytest tests From 515da8dcf839ba3235dece2f52bf903d46e0b3a9 Mon Sep 17 00:00:00 2001 From: Hao Wu Date: Mon, 9 Jun 2025 18:06:29 -0400 Subject: [PATCH 03/14] fix yaml 2.0 --- .github/workflows/python-app.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 0edc70d0..75c90342 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -23,6 +23,8 @@ concurrency: jobs: build: runs-on: ubuntu-latest + env: | + TRITON_INTERPRET: "1" steps: - uses: actions/checkout@v3 @@ -56,8 +58,6 @@ jobs: pip install -e . - name: Test with pytest - env: | - TRITON_INTERPRET: "1" run: | cd triton_viz python -m pytest tests From 8c560deeeebc70a53a050365c58f32bd5a2fdd17 Mon Sep 17 00:00:00 2001 From: Hao Wu Date: Mon, 9 Jun 2025 18:07:43 -0400 Subject: [PATCH 04/14] remove extra '|' --- .github/workflows/python-app.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 75c90342..717dce85 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -23,7 +23,7 @@ concurrency: jobs: build: runs-on: ubuntu-latest - env: | + env: TRITON_INTERPRET: "1" steps: From 76d3188d9ad4322e8a2e119301499d785f8d3209 Mon Sep 17 00:00:00 2001 From: Hao Wu Date: Mon, 9 Jun 2025 19:30:17 -0400 Subject: [PATCH 05/14] remove GPU dependencies in unittests --- tests/test_config.py | 3 ++- tests/test_print_traceback.py | 2 +- tests/test_wrapper.py | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_config.py b/tests/test_config.py index c8926448..8ce9d9c9 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,4 +1,5 @@ -import pytest +import pytest, os +os.environ["TRITON_SANITIZER_BACKEND"] = "off" import triton_viz.core.config as cfg diff --git a/tests/test_print_traceback.py b/tests/test_print_traceback.py index 226d54e3..e09f999b 100644 --- a/tests/test_print_traceback.py +++ b/tests/test_print_traceback.py @@ -27,7 +27,7 @@ def kernel_A(ptr, n): def test_print_nested_functions(): - x = torch.arange(4, device="cuda", dtype=torch.float32) + x = torch.arange(4, dtype=torch.float32) print("Input:", x) # We'll launch a grid bigger than x.numel() to force a out-of-bounds error diff --git a/tests/test_wrapper.py b/tests/test_wrapper.py index 925c521f..54028c4a 100644 --- a/tests/test_wrapper.py +++ b/tests/test_wrapper.py @@ -59,6 +59,7 @@ def _decorator(fn): env = os.environ.copy() env["PYTHONPATH"] = str(tmp_path) + os.pathsep + env.get("PYTHONPATH", "") env["TRITON_SANITIZER_BACKEND"] = "symexec" + env["TRITON_INTERPRET"] = "1" # run the dummy program using triton-sanitizer cmd = ["triton-sanitizer", str(tmp_path / "dummy_program.py")] From 3ddc8bc1703539b9269cfdeace9cb194305c75ba Mon Sep 17 00:00:00 2001 From: Hao Wu Date: Mon, 9 Jun 2025 19:36:52 -0400 Subject: [PATCH 06/14] remove GPU dependencies in test_autotune_add.py --- tests/test_autotune_add.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/test_autotune_add.py b/tests/test_autotune_add.py index e0ece54e..480fd43a 100644 --- a/tests/test_autotune_add.py +++ b/tests/test_autotune_add.py @@ -9,6 +9,17 @@ cfg.sanitizer_backend = "symexec" +# skip benchmarking in CPU-only environments, +# as do_bench relies on GPUs +if not torch.backends.cuda.is_built(): + import triton.testing + triton.testing.do_bench = lambda *_, **__: [0.0, 0.0, 0.0] + from triton.runtime import autotuner + orig_init = autotuner.Autotuner.__init__ + def patched_init(self, *args, **kwargs): + orig_init(self, *args, **kwargs) + self.do_bench = lambda *_, **__: [0.0, 0.0, 0.0] + autotuner.Autotuner.__init__ = patched_init @triton.autotune( configs=[ From 6af5d721f7ef9f2af5fe1c89a3bf192b1138b7d1 Mon Sep 17 00:00:00 2001 From: Hao Wu Date: Mon, 9 Jun 2025 19:38:40 -0400 Subject: [PATCH 07/14] remove cuda tensors --- tests/test_autotune_add.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_autotune_add.py b/tests/test_autotune_add.py index 480fd43a..6ae010b8 100644 --- a/tests/test_autotune_add.py +++ b/tests/test_autotune_add.py @@ -50,8 +50,8 @@ def test_autotune_add_inrange(): This test uses n_elements = 128, matching the size of the input tensors. It should NOT cause any out-of-bound access. """ - x = torch.randn(128, device="cuda") - y = torch.randn(128, device="cuda") + x = torch.randn(128) + y = torch.randn(128) out = torch.empty_like(x) # The kernel launch uses n_elements=128, aligned with the tensor size. @@ -66,8 +66,8 @@ def test_autotune_add_out_of_bound(): This test deliberately sets n_elements = 256, exceeding the actual buffer size (128). It will likely cause out-of-bound reads/writes, which may trigger errors or warnings. """ - x = torch.randn(128, device="cuda") - y = torch.randn(128, device="cuda") + x = torch.randn(128) + y = torch.randn(128) out = torch.empty_like(x) # The kernel launch uses n_elements=256, exceeding the valid tensor size. From 6dbc7eb27c4eb7e7fefd7adc3b2a693b0c7d26f6 Mon Sep 17 00:00:00 2001 From: Hao Wu Date: Mon, 9 Jun 2025 20:12:30 -0400 Subject: [PATCH 08/14] using newest triton --- .github/workflows/python-app.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 717dce85..c06084e5 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -50,7 +50,9 @@ jobs: - name: Clone Triton and Install run: | - pip install triton==3.1.0 + git clone https://github.com/openai/triton.git + pip install -r python/requirements.txt + pip install -e . - name: Install Triton-Viz run: | From 636066b32c12cb4b94009505c3f63e71641011bc Mon Sep 17 00:00:00 2001 From: Hao Wu Date: Mon, 9 Jun 2025 20:15:08 -0400 Subject: [PATCH 09/14] fix triton installation --- .github/workflows/python-app.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index c06084e5..e3cb9f9a 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -51,6 +51,7 @@ jobs: - name: Clone Triton and Install run: | git clone https://github.com/openai/triton.git + cd triton pip install -r python/requirements.txt pip install -e . From 1e3bd5628ff6eff0a43a2b364b0bffa9ebe06b92 Mon Sep 17 00:00:00 2001 From: Hao Wu Date: Wed, 11 Jun 2025 13:15:49 -0400 Subject: [PATCH 10/14] skip autotuner test on CPU for now --- .github/workflows/python-app.yml | 7 ++----- tests/test_autotune_add.py | 9 +++++++++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index e3cb9f9a..55a18749 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -48,12 +48,9 @@ jobs: pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu121 pip uninstall pytorch-triton -y - - name: Clone Triton and Install + - name: Install Triton run: | - git clone https://github.com/openai/triton.git - cd triton - pip install -r python/requirements.txt - pip install -e . + pip install triton==3.1.0 - name: Install Triton-Viz run: | diff --git a/tests/test_autotune_add.py b/tests/test_autotune_add.py index 6ae010b8..c6c5411a 100644 --- a/tests/test_autotune_add.py +++ b/tests/test_autotune_add.py @@ -1,3 +1,4 @@ +import pytest import torch import triton import triton.language as tl @@ -45,6 +46,10 @@ def add_kernel_no_mask(x_ptr, y_ptr, out_ptr, n_elements, BLOCK_SIZE: tl.constex tl.store(out_ptr + offsets, x_val + y_val) +@pytest.mark.skipif( + not torch.backends.cuda.is_built(), + reason="This test requires a CUDA-enabled environment.", +) def test_autotune_add_inrange(): """ This test uses n_elements = 128, matching the size of the input tensors. @@ -61,6 +66,10 @@ def test_autotune_add_inrange(): print("test_autotune_add_inrange() passed: No out-of-bound access.") +@pytest.mark.skipif( + not torch.backends.cuda.is_built(), + reason="This test requires a CUDA-enabled environment.", +) def test_autotune_add_out_of_bound(): """ This test deliberately sets n_elements = 256, exceeding the actual buffer size (128). From 6cfac320abe4fc89c9754f5fffb8a2251f8803f8 Mon Sep 17 00:00:00 2001 From: Hao Wu Date: Wed, 11 Jun 2025 13:45:29 -0400 Subject: [PATCH 11/14] delete patched_init for now --- tests/test_autotune_add.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/tests/test_autotune_add.py b/tests/test_autotune_add.py index c6c5411a..16a24027 100644 --- a/tests/test_autotune_add.py +++ b/tests/test_autotune_add.py @@ -10,18 +10,6 @@ cfg.sanitizer_backend = "symexec" -# skip benchmarking in CPU-only environments, -# as do_bench relies on GPUs -if not torch.backends.cuda.is_built(): - import triton.testing - triton.testing.do_bench = lambda *_, **__: [0.0, 0.0, 0.0] - from triton.runtime import autotuner - orig_init = autotuner.Autotuner.__init__ - def patched_init(self, *args, **kwargs): - orig_init(self, *args, **kwargs) - self.do_bench = lambda *_, **__: [0.0, 0.0, 0.0] - autotuner.Autotuner.__init__ = patched_init - @triton.autotune( configs=[ triton.Config({"BLOCK_SIZE": 32}, num_warps=1), From 7e4253d88d2dce601ab38ea6f8c8bd58362201ac Mon Sep 17 00:00:00 2001 From: Hao Wu Date: Wed, 11 Jun 2025 14:10:47 -0400 Subject: [PATCH 12/14] skip autotune when gpu is missing --- tests/test_autotune_add.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/tests/test_autotune_add.py b/tests/test_autotune_add.py index 16a24027..ccde6a25 100644 --- a/tests/test_autotune_add.py +++ b/tests/test_autotune_add.py @@ -8,6 +8,9 @@ from triton_viz import config as cfg +if not torch.backends.cuda.is_built(): + pytest.skip("This test requires a CUDA-enabled environment.") + cfg.sanitizer_backend = "symexec" @triton.autotune( @@ -34,10 +37,6 @@ def add_kernel_no_mask(x_ptr, y_ptr, out_ptr, n_elements, BLOCK_SIZE: tl.constex tl.store(out_ptr + offsets, x_val + y_val) -@pytest.mark.skipif( - not torch.backends.cuda.is_built(), - reason="This test requires a CUDA-enabled environment.", -) def test_autotune_add_inrange(): """ This test uses n_elements = 128, matching the size of the input tensors. @@ -54,10 +53,6 @@ def test_autotune_add_inrange(): print("test_autotune_add_inrange() passed: No out-of-bound access.") -@pytest.mark.skipif( - not torch.backends.cuda.is_built(), - reason="This test requires a CUDA-enabled environment.", -) def test_autotune_add_out_of_bound(): """ This test deliberately sets n_elements = 256, exceeding the actual buffer size (128). From ee21925b047c0fb6a3d0596ad20acf30932513f6 Mon Sep 17 00:00:00 2001 From: Hao Wu Date: Wed, 11 Jun 2025 14:12:56 -0400 Subject: [PATCH 13/14] allow pytest skipping in module level --- tests/test_autotune_add.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_autotune_add.py b/tests/test_autotune_add.py index ccde6a25..87997fb7 100644 --- a/tests/test_autotune_add.py +++ b/tests/test_autotune_add.py @@ -9,7 +9,7 @@ if not torch.backends.cuda.is_built(): - pytest.skip("This test requires a CUDA-enabled environment.") + pytest.skip("This test requires a CUDA-enabled environment.", allow_module_level=True) cfg.sanitizer_backend = "symexec" From 5d04beb2fa82aaa1a3b631ef5f3ff99fa4daf7d5 Mon Sep 17 00:00:00 2001 From: Hao Wu Date: Wed, 11 Jun 2025 14:25:51 -0400 Subject: [PATCH 14/14] use try-catch to skip autotuner test --- tests/test_autotune_add.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_autotune_add.py b/tests/test_autotune_add.py index 87997fb7..5c1cf79c 100644 --- a/tests/test_autotune_add.py +++ b/tests/test_autotune_add.py @@ -8,7 +8,9 @@ from triton_viz import config as cfg -if not torch.backends.cuda.is_built(): +try: + torch.cuda.current_device() +except: pytest.skip("This test requires a CUDA-enabled environment.", allow_module_level=True) cfg.sanitizer_backend = "symexec"