Skip to content

Commit c6703d1

Browse files
authored
[MISC] Remove unused variableds in C++ (#19609)
Signed-off-by: Lu Fang <lufang@fb.com>
1 parent a5e7242 commit c6703d1

File tree

6 files changed

+2
-39
lines changed

6 files changed

+2
-39
lines changed

csrc/attention/paged_attention_v1.cu

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,6 @@ void paged_attention_v1_launcher(
6565
int kv_block_stride = key_cache.stride(0);
6666
int kv_head_stride = key_cache.stride(1);
6767

68-
[[maybe_unused]] int thread_group_size = MAX(WARP_SIZE / BLOCK_SIZE, 1);
69-
assert(head_size % thread_group_size == 0);
70-
7168
// NOTE: alibi_slopes is optional.
7269
const float* alibi_slopes_ptr =
7370
alibi_slopes
@@ -193,4 +190,4 @@ void paged_attention_v1(
193190
#undef WARP_SIZE
194191
#undef MAX
195192
#undef MIN
196-
#undef DIVIDE_ROUND_UP
193+
#undef DIVIDE_ROUND_UP

csrc/attention/paged_attention_v2.cu

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,6 @@ void paged_attention_v2_launcher(
6666
int kv_block_stride = key_cache.stride(0);
6767
int kv_head_stride = key_cache.stride(1);
6868

69-
[[maybe_unused]] int thread_group_size = MAX(WARP_SIZE / BLOCK_SIZE, 1);
70-
assert(head_size % thread_group_size == 0);
71-
7269
// NOTE: alibi_slopes is optional.
7370
const float* alibi_slopes_ptr =
7471
alibi_slopes
@@ -203,4 +200,4 @@ void paged_attention_v2(
203200
#undef WARP_SIZE
204201
#undef MAX
205202
#undef MIN
206-
#undef DIVIDE_ROUND_UP
203+
#undef DIVIDE_ROUND_UP

csrc/prepare_inputs/advance_step.cu

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,6 @@ void advance_step_flashinfer(
274274
cudaDeviceGetAttribute(&blocks, cudaDevAttrMultiProcessorCount, dev);
275275
cudaDeviceGetAttribute(&threads, cudaDevAttrMaxThreadsPerBlock, dev);
276276

277-
[[maybe_unused]] int block_tables_stride = block_tables.stride(0);
278277
TORCH_CHECK((blocks * threads > num_queries),
279278
"multi-step: not enough threads to map to num_queries = ",
280279
num_queries, " block_tables.stride(0) = ", block_tables.stride(0),

csrc/quantization/fp8/amd/quant_utils.cuh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -446,8 +446,6 @@ scaled_vec_conversion<uint16_t, uint8_t>(const uint8_t& a, float scale) {
446446
template <>
447447
__inline__ __device__ uint32_t
448448
scaled_vec_conversion<uint32_t, uint16_t>(const uint16_t& a, float scale) {
449-
[[maybe_unused]] __half2_raw h2r =
450-
__hip_cvt_fp8x2_to_halfraw2(a, fp8_type::__default_interpret);
451449
union {
452450
__half2_raw h2r;
453451
uint32_t ui32;

csrc/quantization/gptq/q_gemm.cu

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -206,8 +206,6 @@ __global__ void gemm_half_q_half_gptq_4bit_kernel(
206206
auto offset_m = blockIdx.y * m_count;
207207
auto offset_k = blockIdx.z * BLOCK_KN_SIZE;
208208

209-
[[maybe_unused]] int end_n = min(offset_n + BLOCK_KN_SIZE * 4, size_n);
210-
[[maybe_unused]] int end_m = min(offset_m + m_count, size_m);
211209
int end_k = min(offset_k + BLOCK_KN_SIZE, size_k);
212210

213211
int n = offset_n + t * 4;
@@ -344,8 +342,6 @@ __global__ void gemm_half_q_half_gptq_2bit_kernel(
344342
auto offset_m = blockIdx.y * m_count;
345343
auto offset_k = blockIdx.z * BLOCK_KN_SIZE;
346344

347-
[[maybe_unused]] int end_n = min(offset_n + BLOCK_KN_SIZE * 4, size_n);
348-
[[maybe_unused]] int end_m = min(offset_m + m_count, size_m);
349345
int end_k = min(offset_k + BLOCK_KN_SIZE, size_k);
350346

351347
int n = offset_n + t * 4;
@@ -465,8 +461,6 @@ __global__ void gemm_half_q_half_gptq_3bit_kernel(
465461
auto offset_m = blockIdx.y * m_count;
466462
auto offset_k = blockIdx.z * BLOCK_KN_SIZE;
467463

468-
[[maybe_unused]] int end_n = min(offset_n + BLOCK_KN_SIZE * 4, size_n);
469-
[[maybe_unused]] int end_m = min(offset_m + m_count, size_m);
470464
int end_k = min(offset_k + BLOCK_KN_SIZE, size_k);
471465

472466
int n = offset_n + t * 4;
@@ -593,8 +587,6 @@ __global__ void gemm_half_q_half_gptq_8bit_kernel(
593587
auto offset_m = blockIdx.y * m_count;
594588
auto offset_k = blockIdx.z * BLOCK_KN_SIZE;
595589

596-
[[maybe_unused]] int end_n = min(offset_n + BLOCK_KN_SIZE * 4, size_n);
597-
[[maybe_unused]] int end_m = min(offset_m + m_count, size_m);
598590
int end_k = min(offset_k + BLOCK_KN_SIZE, size_k);
599591

600592
int n = offset_n + t * 4;

csrc/rocm/attention.cu

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -136,11 +136,6 @@ __device__ __forceinline__ T from_float(const float& inp) {
136136

137137
template <typename T>
138138
__device__ __forceinline__ _B16x4 from_floatx4(const floatx4& inp) {
139-
[[maybe_unused]] union tmpcvt {
140-
uint16_t u;
141-
_Float16 f;
142-
__hip_bfloat16 b;
143-
} t16;
144139
_B16x4 ret;
145140
if constexpr (std::is_same<T, _Float16>::value) {
146141
union h2cvt {
@@ -169,11 +164,6 @@ __device__ __forceinline__ _B16x4 from_floatx4(const floatx4& inp) {
169164
template <typename T>
170165
__device__ __forceinline__ _B16x4 addx4(const _B16x4& inp1,
171166
const _B16x4& inp2) {
172-
[[maybe_unused]] union tmpcvt {
173-
uint16_t u;
174-
_Float16 f;
175-
__hip_bfloat16 b;
176-
} t1, t2, res;
177167
_B16x4 ret;
178168
if constexpr (std::is_same<T, _Float16>::value) {
179169
union h2cvt {
@@ -325,8 +315,6 @@ __launch_bounds__(NUM_THREADS, 5) void paged_attention_ll4mi_QKV_mfma16_kernel(
325315

326316
constexpr int GQA_RATIO4 = DIVIDE_ROUND_UP(GQA_RATIO, 4);
327317

328-
[[maybe_unused]] __shared__ float shared_qk_max[NWARPS][16 + 1];
329-
[[maybe_unused]] __shared__ float shared_exp_sum[NWARPS][16 + 1];
330318
// shared_logits is used for multiple purposes
331319
__shared__ _B16x4 shared_logits[NWARPS][4][16][4];
332320

@@ -444,8 +432,6 @@ __launch_bounds__(NUM_THREADS, 5) void paged_attention_ll4mi_QKV_mfma16_kernel(
444432
const cache_t* k_ptr2 = k_ptr + kblock_number * kv_block_stride;
445433
const int klocal_token_idx =
446434
TOKENS_PER_WARP * warpid + token_depth * 16 + lane16id;
447-
[[maybe_unused]] const int kglobal_token_idx =
448-
partition_start_token_idx + klocal_token_idx;
449435
const int kphysical_block_offset = klocal_token_idx % BLOCK_SIZE;
450436
const cache_t* k_ptr3 = k_ptr2 + kphysical_block_offset * KX;
451437

@@ -1309,9 +1295,7 @@ __launch_bounds__(NUM_THREADS) void paged_attention_ll4mi_reduce_kernel(
13091295

13101296
const int context_len = context_lens[seq_idx];
13111297
const int num_partitions = DIVIDE_ROUND_UP(context_len, PARTITION_SIZE);
1312-
[[maybe_unused]] constexpr int NUM_WARPS = NUM_THREADS / WARP_SIZE;
13131298
const auto warpid = threadIdx.x / WARP_SIZE;
1314-
[[maybe_unused]] const auto laneid = threadIdx.x % WARP_SIZE;
13151299

13161300
__shared__ float shared_global_exp_sum;
13171301
// max num partitions supported is warp_size * NPAR_LOOPS
@@ -2080,9 +2064,7 @@ __launch_bounds__(NUM_THREADS) void paged_attention_ll4mi_reduce_kernel(
20802064

20812065
const int context_len = context_lens[seq_idx];
20822066
const int num_partitions = DIVIDE_ROUND_UP(context_len, PARTITION_SIZE);
2083-
[[maybe_unused]] constexpr int NUM_WARPS = NUM_THREADS / WARP_SIZE;
20842067
const int warpid = threadIdx.x / WARP_SIZE;
2085-
[[maybe_unused]] const int laneid = threadIdx.x % WARP_SIZE;
20862068

20872069
__shared__ float shared_global_exp_sum;
20882070
// max num partitions supported is warp_size * NPAR_LOOPS
@@ -2816,9 +2798,7 @@ __launch_bounds__(NUM_THREADS) void paged_attention_ll4mi_reduce_kernel(
28162798

28172799
const int context_len = context_lens[seq_idx];
28182800
const int num_partitions = DIVIDE_ROUND_UP(context_len, PARTITION_SIZE);
2819-
[[maybe_unused]] constexpr int NUM_WARPS = NUM_THREADS / WARP_SIZE;
28202801
const int warpid = threadIdx.x / WARP_SIZE;
2821-
[[maybe_unused]] const int laneid = threadIdx.x % WARP_SIZE;
28222802

28232803
__shared__ float shared_global_exp_sum;
28242804
// max num partitions supported is warp_size * NPAR_LOOPS

0 commit comments

Comments
 (0)