Skip to content

[ET-VK] New Implementation of `permute' operator #11825

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: gh/SS-JIA/245/base
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 0 additions & 89 deletions backends/vulkan/runtime/graph/ops/glsl/permute.glsl

This file was deleted.

72 changes: 72 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/permute_buffer.glsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#version 450 core

#define PRECISION ${PRECISION}

#define VEC4_T ${texel_type(DTYPE)}
#define T ${buffer_scalar_type(DTYPE)}

${define_active_storage_type("buffer")}
${define_required_extensions(DTYPE)}

layout(std430) buffer;

#include "indexing_utils.h"

${layout_declare_tensor(B, "w", "t_out", DTYPE, "buffer")}
${layout_declare_tensor(B, "r", "t_in", DTYPE, "buffer")}

${layout_declare_ubo(B, "ivec4", "in_sizes")}
${layout_declare_ubo(B, "ivec4", "out_strides")}
${layout_declare_ubo(B, "int", "out_numel")}

layout(push_constant) uniform restrict Block {
ivec4 in_strides;
ivec4 permute_dims; // Permutation mapping: permute_dims[i] = j means output dim i comes from input dim j
};

${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")}
${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")}

const lowp ivec4 out_dim_order = unhash_dim_order(out_layout);

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

// Convert output tensor index to input tensor index based on permutation
ivec4 out_tidx_to_in_tidx(const ivec4 out_tidx) {
ivec4 in_tidx;

// Apply the permutation mapping: in_tidx[permute_dims[i]] = out_tidx[i]
in_tidx[permute_dims.x] = out_tidx.x;
in_tidx[permute_dims.y] = out_tidx.y;
in_tidx[permute_dims.z] = out_tidx.z;
in_tidx[permute_dims.w] = out_tidx.w;

return in_tidx;
}

void main() {
const int out_bufi = ivec3(gl_GlobalInvocationID).x;
if (out_bufi >= out_numel) {
return;
}

// Convert buffer index to tensor index for output
const ivec4 out_tidx = bufi_to_tidx(out_bufi, out_strides, out_dim_order);

// Convert output tensor index to input tensor index using permutation
const ivec4 in_tidx = out_tidx_to_in_tidx(out_tidx);

// Convert input tensor index back to buffer index
const int in_bufi = tidx_to_bufi(in_tidx, in_strides);

// Copy data from input to output
t_out[out_bufi] = t_in[in_bufi];
}
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
permute:
permute_buffer:
parameter_names_with_default_values:
DTYPE: float
NDIM: 3
STORAGE: texture3d
generate_variant_forall:
DTYPE:
- VALUE: half
- VALUE: float
- VALUE: int32
shader_variants:
- NAME: permute
- NAME: permute_buffer
103 changes: 103 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/permute_texture.glsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#version 450 core

#define PRECISION ${PRECISION}

#define VEC4_T ${texel_type(DTYPE)}
#define T ${buffer_scalar_type(DTYPE)}

${define_active_storage_type("texture3d")}
${define_required_extensions(DTYPE)}

layout(std430) buffer;

#include "indexing_utils.h"

${layout_declare_tensor(B, "w", "t_out", DTYPE, "texture3d")}
${layout_declare_tensor(B, "r", "t_in", DTYPE, "texture3d")}

layout(push_constant) uniform restrict Block {
ivec4 out_sizes;
ivec4 in_sizes;
ivec4 permute_dims; // Permutation mapping: permute_dims[i] = j means output dim i comes from input dim j
};

${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 out_axis_map = unhash_axis_map(out_layout);
const lowp int out_packed_dim = unhash_packed_dim(out_layout);

${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 in_axis_map = unhash_axis_map(in_layout);
const lowp int in_packed_dim = unhash_packed_dim(in_layout);

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

// Convert output tensor index to input tensor index based on permutation
ivec4 out_tidx_to_in_tidx(const ivec4 out_tidx) {
ivec4 in_tidx;

// Apply the permutation mapping: in_tidx[permute_dims[i]] = out_tidx[i]
in_tidx[permute_dims.x] = out_tidx.x;
in_tidx[permute_dims.y] = out_tidx.y;
in_tidx[permute_dims.z] = out_tidx.z;
in_tidx[permute_dims.w] = out_tidx.w;

return in_tidx;
}

// Check if we can use the fast path where texels from the input tensor can be
// copied directly into the output tensor. This occurs when the packed dimension
// is preserved in the permutation, i.e. reading a texel from the output tensor
// produces 4 texels along the same dimension as reading a texel from the input
// tensor.
bool can_use_fast_path() {
// Fast path is possible when the packed dimension is preserved in the permutation
// This means permute_dims[out_packed_dim] == in_packed_dim
return permute_dims[out_packed_dim] == in_packed_dim;
}

void main() {
const ivec3 lpos = ivec3(gl_GlobalInvocationID);
ivec4 out_tidx = lpos_to_tidx(lpos, out_sizes, out_axis_map.w, out_packed_dim);

if (any(greaterThanEqual(out_tidx, out_sizes))) {
return;
}

if (can_use_fast_path()) {
// Fast path: packed dimension is preserved, so we can copy texels directly
ivec4 in_tidx = out_tidx_to_in_tidx(out_tidx);
ivec3 in_pos = tidx_to_pos(in_tidx, in_sizes, in_axis_map, in_packed_dim);
VEC4_T in_texel = VEC4_T(load_texel(t_in, in_pos));

write_texel_lpos(t_out, lpos, in_texel, out_axis_map);
}
else {
// Slow path: packed dimension is not preserved, so each element of the
// output texel may be "sourced" from a different texel in the input tensor.
// Therefore each output texel element is processed individually.
VEC4_T out_texel = VEC4_T(0);

for (int texel_i = 0; texel_i < 4; ++texel_i) {
ivec4 in_tidx = out_tidx_to_in_tidx(out_tidx);
ivec3 in_pos = tidx_to_pos(in_tidx, in_sizes, in_axis_map, in_packed_dim);
int element_idx = in_tidx[in_packed_dim] % 4;

VEC4_T in_texel = VEC4_T(load_texel(t_in, in_pos));
T selected_value = T(in_texel[element_idx]);

out_texel[texel_i] = selected_value;

out_tidx[out_packed_dim]++;
}

write_texel_lpos(t_out, lpos, out_texel, out_axis_map);
}
}
10 changes: 10 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/permute_texture.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
permute_texture:
parameter_names_with_default_values:
DTYPE: float
generate_variant_forall:
DTYPE:
- VALUE: half
- VALUE: float
- VALUE: int32
shader_variants:
- NAME: permute_texture3d
Loading
Loading