@@ -37,7 +37,6 @@ def _add_make_precompiler(x: torch.Tensor, y: torch.Tensor):
37
37
--- assertExpectedJournal(TestExamples.test_attention_block_pointer)
38
38
from __future__ import annotations
39
39
40
- import math
41
40
import torch
42
41
import triton
43
42
import triton.language as tl
@@ -103,7 +102,6 @@ def attention(q_in: torch.Tensor, k_in: torch.Tensor, v_in: torch.Tensor):
103
102
v_view = v_in.reshape([-1, n_dim, head_dim])
104
103
k_view = k_in.reshape([-1, n_dim, head_dim]).transpose(1, 2)
105
104
out = torch.empty_like(q_view)
106
- sm_scale = 1.0 / math.sqrt(head_dim)
107
105
_BLOCK_SIZE_1 = 128
108
106
_BLOCK_SIZE_3 = 64
109
107
_attention_kernel[64 * triton.cdiv(1024, _BLOCK_SIZE_1),](q_view, k_view, v_view, out, _BLOCK_SIZE_1, _BLOCK_SIZE_3, num_warps=4, num_stages=3)
@@ -119,7 +117,6 @@ def _attention_make_precompiler(q_in: torch.Tensor, k_in: torch.Tensor, v_in: to
119
117
v_view = v_in.reshape([-1, n_dim, head_dim])
120
118
k_view = k_in.reshape([-1, n_dim, head_dim]).transpose(1, 2)
121
119
out = torch.empty_like(q_view)
122
- sm_scale = 1.0 / math.sqrt(head_dim)
123
120
_BLOCK_SIZE_1 = 128
124
121
_BLOCK_SIZE_3 = 64
125
122
from helion.runtime.precompile_shim import make_precompiler
@@ -128,7 +125,6 @@ def _attention_make_precompiler(q_in: torch.Tensor, k_in: torch.Tensor, v_in: to
128
125
--- assertExpectedJournal(TestExamples.test_attention_dynamic)
129
126
from __future__ import annotations
130
127
131
- import math
132
128
import torch
133
129
import triton
134
130
import triton.language as tl
@@ -198,7 +194,6 @@ def attention(q_in: torch.Tensor, k_in: torch.Tensor, v_in: torch.Tensor):
198
194
v_view = v_in.reshape([-1, n_dim, head_dim])
199
195
k_view = k_in.reshape([-1, n_dim, head_dim]).transpose(1, 2)
200
196
out = torch.empty_like(q_view)
201
- sm_scale = 1.0 / math.sqrt(head_dim)
202
197
_BLOCK_SIZE_1 = 32
203
198
_RDIM_SIZE_2 = 64
204
199
_BLOCK_SIZE_3 = 32
@@ -215,7 +210,6 @@ def _attention_make_precompiler(q_in: torch.Tensor, k_in: torch.Tensor, v_in: to
215
210
v_view = v_in.reshape([-1, n_dim, head_dim])
216
211
k_view = k_in.reshape([-1, n_dim, head_dim]).transpose(1, 2)
217
212
out = torch.empty_like(q_view)
218
- sm_scale = 1.0 / math.sqrt(head_dim)
219
213
_BLOCK_SIZE_1 = 32
220
214
_RDIM_SIZE_2 = 64
221
215
_BLOCK_SIZE_3 = 32
@@ -225,7 +219,6 @@ def _attention_make_precompiler(q_in: torch.Tensor, k_in: torch.Tensor, v_in: to
225
219
--- assertExpectedJournal(TestExamples.test_attention_pointer)
226
220
from __future__ import annotations
227
221
228
- import math
229
222
import torch
230
223
import triton
231
224
import triton.language as tl
@@ -291,7 +284,6 @@ def attention(q_in: torch.Tensor, k_in: torch.Tensor, v_in: torch.Tensor):
291
284
v_view = v_in.reshape([-1, n_dim, head_dim])
292
285
k_view = k_in.reshape([-1, n_dim, head_dim]).transpose(1, 2)
293
286
out = torch.empty_like(q_view)
294
- sm_scale = 1.0 / math.sqrt(head_dim)
295
287
_BLOCK_SIZE_1 = 64
296
288
_RDIM_SIZE_2 = 64
297
289
_BLOCK_SIZE_3 = 64
@@ -308,7 +300,6 @@ def _attention_make_precompiler(q_in: torch.Tensor, k_in: torch.Tensor, v_in: to
308
300
v_view = v_in.reshape([-1, n_dim, head_dim])
309
301
k_view = k_in.reshape([-1, n_dim, head_dim]).transpose(1, 2)
310
302
out = torch.empty_like(q_view)
311
- sm_scale = 1.0 / math.sqrt(head_dim)
312
303
_BLOCK_SIZE_1 = 64
313
304
_RDIM_SIZE_2 = 64
314
305
_BLOCK_SIZE_3 = 64
0 commit comments