Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 0 additions & 15 deletions backends/vulkan/runtime/api/containers/Tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,6 @@ vTensor::vTensor(
sizes_uniform_(),
strides_uniform_(),
numel_uniform_(),
axis_map_uniform_(),
logical_limits_uniform_(),
// Construct Tensor storage
storage_(
Expand Down Expand Up @@ -501,7 +500,6 @@ vTensor::vTensor(
sizes_uniform_(),
strides_uniform_(),
numel_uniform_(),
axis_map_uniform_(),
logical_limits_uniform_(),
// Construct Tensor storage
storage_(context, image) {
Expand All @@ -527,7 +525,6 @@ vTensor::vTensor(vTensor& other)
sizes_uniform_(),
strides_uniform_(),
numel_uniform_(),
axis_map_uniform_(),
logical_limits_uniform_(),
// Copy Tensor storage
storage_(other.storage_) {}
Expand All @@ -553,7 +550,6 @@ vTensor::vTensor(
sizes_uniform_(),
strides_uniform_(),
numel_uniform_(),
axis_map_uniform_(),
logical_limits_uniform_(),
// Copy Tensor storage
storage_(other.storage_, vkapi::element_size(dtype_) * offset_numel) {
Expand Down Expand Up @@ -630,14 +626,6 @@ const vkapi::BufferBindInfo vTensor::strides_ubo() {
return vkapi::BufferBindInfo(strides_uniform_.buffer());
}

const vkapi::BufferBindInfo vTensor::axis_map_ubo() {
if (!axis_map_uniform_.buffer()) {
axis_map_uniform_ =
ParamsBuffer(storage_.context_, utils::make_ivec4(axis_map_));
}
return vkapi::BufferBindInfo(axis_map_uniform_.buffer());
}

const vkapi::BufferBindInfo vTensor::logical_limits_ubo() {
if (!logical_limits_uniform_.buffer()) {
logical_limits_uniform_ = ParamsBuffer(storage_.context_, logical_limits_);
Expand Down Expand Up @@ -710,9 +698,6 @@ void vTensor::update_metadata() {
if (numel_uniform_.buffer()) {
numel_uniform_.update(numel_);
}
if (axis_map_uniform_.buffer()) {
axis_map_uniform_.update(utils::make_ivec4(axis_map_));
}
if (logical_limits_uniform_.buffer()) {
logical_limits_uniform_.update(logical_limits_);
}
Expand Down
20 changes: 13 additions & 7 deletions backends/vulkan/runtime/api/containers/Tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,6 @@ class vTensor final {
ParamsBuffer sizes_uniform_;
ParamsBuffer strides_uniform_;
ParamsBuffer numel_uniform_;
ParamsBuffer axis_map_uniform_;
ParamsBuffer logical_limits_uniform_;

vTensorStorage storage_;
Expand Down Expand Up @@ -430,6 +429,19 @@ class vTensor final {
return axis_map_;
}

/*
* Returns a single int32_t that contains the values of the axis map and the
* packed dimension packed into a single int32_t, such that it can be used as
* a specialization constant in a compute shader. This allows for the SPIR-V
* to bytecode compilation to perform compile-time unfolding on the axis map.
* Each element of the axis map and the value of the packed dimension take up
* 4 bits in the packed int32_t.
*/
inline int32_t hashed_layout() const {
return axis_map_.at(0) + (axis_map_.at(1) << 4) + (axis_map_.at(2) << 8) +
(axis_map_.at(3) << 12) + (packed_dim_ << 16);
}

/*
* Return true if the tensor's axis map is {0, 1, 2, concat_dim}. This means
* that the width dim is mapped to the width axis of the texture, the height
Expand Down Expand Up @@ -463,12 +475,6 @@ class vTensor final {
*/
const vkapi::BufferBindInfo strides_ubo();

/*
* Returns a GPU buffer containing the texture axis mapping for each dimension
* of the tensor, in WHCN dimension order.
*/
const vkapi::BufferBindInfo axis_map_ubo();

/*
* Returns a GPU buffer containing the logical limits of the tensor. See the
* comments for logical_limits() for more context.
Expand Down
8 changes: 4 additions & 4 deletions backends/vulkan/runtime/graph/ComputeGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,10 @@ class ComputeGraph final {
return values_.at(idx).toConstTensor().estimate_memory_layout();
}

inline int32_t hashed_layout_of(const ValueRef idx) const {
return values_.at(idx).toConstTensor().hashed_layout();
}

inline int32_t packed_dim_of(const ValueRef idx) const {
return values_.at(idx).toConstTensor().packed_dim();
}
Expand All @@ -338,10 +342,6 @@ class ComputeGraph final {
return values_.at(idx).toTensor().numel_ubo();
}

inline vkapi::BufferBindInfo axis_map_ubo(const ValueRef idx) {
return values_.at(idx).toTensor().axis_map_ubo();
}

inline bool has_standard_axis_map(const ValueRef idx) {
return values_.at(idx).toTensor().has_standard_axis_map();
}
Expand Down
28 changes: 18 additions & 10 deletions backends/vulkan/runtime/graph/ops/glsl/addmm_naive_texture3d.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -16,31 +16,39 @@ $if MAT2_IS_TRANSPOSED:
$if HAS_BIAS:
#define HAS_BIAS

#include "indexing_utils.h"

${layout_declare_tensor(B, "w", "out_tensor", DTYPE, "texture3d")}
${layout_declare_tensor(B, "r", "mat1_tensor", DTYPE, "texture3d")}
${layout_declare_tensor(B, "r", "mat2_tensor", DTYPE, "texture3d")}
$if HAS_BIAS:
${layout_declare_tensor(B, "r", "bias_tensor", DTYPE, "texture3d")}
${layout_declare_ubo(B, "ivec4", "out_sizes")}
${layout_declare_ubo(B, "ivec3", "out_limits")}
${layout_declare_ubo(B, "ivec4", "out_axis_map")}
${layout_declare_ubo(B, "ivec4", "mat1_sizes")}
${layout_declare_ubo(B, "ivec4", "mat1_axis_map")}
${layout_declare_ubo(B, "ivec4", "mat2_sizes")}
${layout_declare_ubo(B, "ivec4", "mat2_axis_map")}
$if HAS_BIAS:
${layout_declare_ubo(B, "ivec4", "bias_sizes")}
${layout_declare_ubo(B, "ivec4", "bias_axis_map")}
${layout_declare_ubo(B, "float", "alpha", "float", "beta")}

#include "indexing_utils.h"

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

layout(constant_id = 3) const int out_packed_dim = C_DIM;
layout(constant_id = 4) const int mat1_packed_dim = W_DIM;
layout(constant_id = 5) const int mat2_packed_dim = H_DIM;
layout(constant_id = 6) const int bias_packed_dim = W_DIM;
${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 out_axis_map = unhash_axis_map(out_layout);
const lowp int out_packed_dim = unhash_packed_dim(out_layout);

${layout_declare_spec_const(C, "int", "mat1_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 mat1_axis_map = unhash_axis_map(mat1_layout);
const lowp int mat1_packed_dim = unhash_packed_dim(mat1_layout);

${layout_declare_spec_const(C, "int", "mat2_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 mat2_axis_map = unhash_axis_map(mat2_layout);
const lowp int mat2_packed_dim = unhash_packed_dim(mat2_layout);

$if HAS_BIAS:
${layout_declare_spec_const(C, "int", "bias_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 bias_axis_map = unhash_axis_map(bias_layout);
const lowp int bias_packed_dim = unhash_packed_dim(bias_layout);

#ifdef HAS_BIAS
vec4 get_bias_texel_W_packed(ivec3 logical_pos) {
Expand Down
22 changes: 15 additions & 7 deletions backends/vulkan/runtime/graph/ops/glsl/addmm_optimized.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,35 @@ $if BATCH_MODE:
$if HAS_BIAS:
#define HAS_BIAS

#include "indexing_utils.h"

${layout_declare_tensor(B, "w", "out_tensor", DTYPE, "texture3d")}
${layout_declare_tensor(B, "r", "mat1_tensor", DTYPE, "texture3d")}
${layout_declare_tensor(B, "r", "mat2_tensor", DTYPE, "texture3d")}
$if HAS_BIAS:
${layout_declare_tensor(B, "r", "bias_tensor", DTYPE, "texture3d")}
${layout_declare_ubo(B, "ivec4", "out_sizes")}
${layout_declare_ubo(B, "ivec4", "out_axis_map")}
${layout_declare_ubo(B, "ivec4", "mat1_sizes")}
${layout_declare_ubo(B, "ivec4", "mat1_axis_map")}
${layout_declare_ubo(B, "ivec4", "mat2_sizes")}
${layout_declare_ubo(B, "ivec4", "mat2_axis_map")}
$if HAS_BIAS:
${layout_declare_ubo(B, "ivec4", "bias_sizes")}
${layout_declare_ubo(B, "ivec4", "bias_axis_map")}
${layout_declare_ubo(B, "float", "alpha", "float", "beta")}

#include "indexing_utils.h"

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

layout(constant_id = 3) const int out_packed_dim = C_DIM;
${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 out_axis_map = unhash_axis_map(out_layout);
const lowp int out_packed_dim = unhash_packed_dim(out_layout);

${layout_declare_spec_const(C, "int", "mat1_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 mat1_axis_map = unhash_axis_map(mat1_layout);

${layout_declare_spec_const(C, "int", "mat2_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 mat2_axis_map = unhash_axis_map(mat2_layout);

$if HAS_BIAS:
${layout_declare_spec_const(C, "int", "bias_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 bias_axis_map = unhash_axis_map(bias_layout);

// To convince the SPIR-V compiler to unroll the loops optimally, need this
// macro
Expand Down
19 changes: 12 additions & 7 deletions backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,31 @@

#define op(X, Y, A) ${OPERATOR}

#include "broadcasting_utils.h"
#include "indexing_utils.h"

layout(std430) buffer;

${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)}
${layout_declare_tensor(B, "r", "t_other", DTYPE, STORAGE)}
${layout_declare_ubo(B, "ivec4", "out_sizes")}
${layout_declare_ubo(B, "ivec4", "out_axis_map")}
${layout_declare_ubo(B, "ivec4", "in_sizes")}
${layout_declare_ubo(B, "ivec4", "in_axis_map")}
${layout_declare_ubo(B, "ivec4", "other_sizes")}
${layout_declare_ubo(B, "ivec4", "other_axis_map")}
${layout_declare_ubo(B, "ivec2", "broadcast_params")}
${layout_declare_ubo(B, "float", "alpha")}

#include "broadcasting_utils.h"
#include "indexing_utils.h"

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

layout(constant_id = 3) const int packed_dim = C_DIM;
${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 out_axis_map = unhash_axis_map(out_layout);
const lowp int packed_dim = unhash_packed_dim(out_layout);

${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 in_axis_map = unhash_axis_map(in_layout);

${layout_declare_spec_const(C, "int", "other_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 other_axis_map = unhash_axis_map(other_layout);

void main() {
const ivec3 lpos = ivec3(gl_GlobalInvocationID);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,13 @@ layout(std430) buffer;
${layout_declare_buffer(B, "w", "nchw_out", "int")}
${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)}
${layout_declare_ubo(B, "ivec4", "tensor_sizes")}
${layout_declare_ubo(B, "ivec4", "axis_map")}
${layout_declare_ubo(B, "int", "out_numel")}

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

layout(constant_id = 3) const int packed_dim = C_DIM;
${layout_declare_spec_const(C, "int", "t_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 axis_map = unhash_axis_map(t_layout);
const lowp int packed_dim = unhash_packed_dim(t_layout);

void main() {
const int out_buf_idx = int(gl_GlobalInvocationID.x);
Expand Down
21 changes: 14 additions & 7 deletions backends/vulkan/runtime/graph/ops/glsl/conv1d.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@

#define op(X, A, B) ${OPERATOR}

#include "indexing_utils.h"

layout(std430) buffer;

${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
Expand All @@ -26,17 +24,26 @@ ${layout_declare_tensor(B, "r", "bias_in", DTYPE, STORAGE)}
${layout_declare_ubo(B, "ivec3", "out_limits")}
${layout_declare_ubo(B, "ivec4", "in_sizes")}

${layout_declare_ubo(B, "ivec4", "out_axis_map")}
${layout_declare_ubo(B, "ivec4", "in_axis_map")}
${layout_declare_ubo(B, "ivec4", "kernel_axis_map")}
${layout_declare_ubo(B, "ivec4", "bias_axis_map")}

${layout_declare_ubo(B,"int", "kernel_size", "int", "stride", "int", "padding", "int", "dilation", "int", "in_group_size", "int", "out_group_size")}

${layout_declare_ubo(B, "float", "out_min", "float", "out_max")}

#include "indexing_utils.h"

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 out_axis_map = unhash_axis_map(out_layout);

${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 in_axis_map = unhash_axis_map(in_layout);

${layout_declare_spec_const(C, "int", "kernel_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 kernel_axis_map = unhash_axis_map(kernel_layout);

${layout_declare_spec_const(C, "int", "bias_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 bias_axis_map = unhash_axis_map(bias_layout);

// Let us define
//
// input = (N, in_C, in_L),
Expand Down
24 changes: 14 additions & 10 deletions backends/vulkan/runtime/graph/ops/glsl/copy_channel_offset.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,14 @@

layout(std430) buffer;

#include "indexing_utils.h"
${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
${layout_declare_tensor(B, "r", "existing_out", DTYPE, STORAGE)}
${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)}

${layout_declare_tensor(0, "w", "t_out", DTYPE, STORAGE)}
${layout_declare_tensor(1, "r", "existing_out", DTYPE, STORAGE)}
${layout_declare_tensor(2, "r", "t_in", DTYPE, STORAGE)}
${layout_declare_ubo(B, "ivec4", "out_sizes")}
${layout_declare_ubo(B, "ivec4", "in_sizes")}

${layout_declare_ubo(3, "ivec4", "out_sizes")}
${layout_declare_ubo(4, "ivec4", "out_axis_map")}
${layout_declare_ubo(5, "ivec4", "in_sizes")}
${layout_declare_ubo(6, "ivec4", "in_axis_map")}
layout(set = 0, binding = 7) uniform PRECISION restrict CopyArgs {
layout(set = 0, binding = 5) uniform PRECISION restrict CopyArgs {
// Operates on (x, y, z) logical extents.
ivec3 range;
// Analogus to range variable in copy. It defines the # of channel being
Expand All @@ -35,9 +32,16 @@ layout(set = 0, binding = 7) uniform PRECISION restrict CopyArgs {
int src_channel_offset;
};

#include "indexing_utils.h"

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

layout(constant_id = 3) const int packed_dim = C_DIM;
${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 out_axis_map = unhash_axis_map(out_layout);
const lowp int packed_dim = unhash_packed_dim(out_layout);

${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 in_axis_map = unhash_axis_map(in_layout);

void main() {
// Note: Unlike other shaders, the range is often not equal to the destination
Expand Down
12 changes: 8 additions & 4 deletions backends/vulkan/runtime/graph/ops/glsl/copy_offset.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,23 @@

${define_active_storage_type(STORAGE)}

#include "indexing_utils.h"

layout(std430) buffer;

${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)}

${layout_declare_ubo(B, "ivec3", "range", "ivec3", "src_offset", "ivec3", "dst_offset")}
${layout_declare_ubo(B, "ivec4", "out_axis_map")}
${layout_declare_ubo(B, "ivec4", "in_axis_map")}

#include "indexing_utils.h"

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 out_axis_map = unhash_axis_map(out_layout);

${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 in_axis_map = unhash_axis_map(in_layout);

void main() {
const ivec3 pos = ivec3(gl_GlobalInvocationID);

Expand Down
Loading
Loading