File tree Expand file tree Collapse file tree 1 file changed +7
-7
lines changed Expand file tree Collapse file tree 1 file changed +7
-7
lines changed Original file line number Diff line number Diff line change @@ -4326,13 +4326,13 @@ static __global__ void mul_mat_vec_nc_f16_f32( // nc == non-contiguous
4326
4326
4327
4327
const half * x = (const half *) vx;
4328
4328
4329
- const int row_x = blockDim .y *blockIdx .y + threadIdx .y ;
4330
- const int channel = blockDim .z *blockIdx .z + threadIdx .z ;
4329
+ const int row_x = blockDim .y *blockIdx .y + threadIdx .y ;
4330
+ const int channel = blockDim .z *blockIdx .z + threadIdx .z ;
4331
4331
const int channel_x = channel / channel_x_divisor;
4332
4332
4333
- const int nrows_y = ncols_x;
4333
+ const int nrows_y = ncols_x;
4334
4334
const int nrows_dst = nrows_x;
4335
- const int row_dst = row_x;
4335
+ const int row_dst = row_x;
4336
4336
4337
4337
const int idst = channel*nrows_dst + row_dst;
4338
4338
@@ -4345,13 +4345,13 @@ static __global__ void mul_mat_vec_nc_f16_f32( // nc == non-contiguous
4345
4345
break ;
4346
4346
}
4347
4347
4348
- const int ix = channel_x*channel_stride_x + row_x*row_stride_x + col_x;
4349
- const float xi = __half2float (x[ix]);
4350
-
4351
4348
const int row_y = col_x;
4352
4349
4350
+ const int ix = channel_x*channel_stride_x + row_x*row_stride_x + col_x;
4353
4351
const int iy = channel*nrows_y + row_y;
4354
4352
4353
+ const float xi = __half2float (x[ix]);
4354
+
4355
4355
tmp += xi * y[iy];
4356
4356
}
4357
4357
You can’t perform that action at this time.
0 commit comments