Skip to content

[mps] Add cshim for torchao mps ops #2502

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions torchao/experimental/ops/mps/cshim.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import torch

# List of ops and their c-shim declarations used for AOTInductor
# Check out TestUIntxWeightOnlyLinearQuantizer.test_export_accuracy on how to use it
torchao_op_c_shim: dict[torch.ops.OpOverload, list[str]] = {}

for nbit in range(1, 8):
op_name = f"_linear_fp_act_{nbit}bit_weight"
torchao_op_c_shim[getattr(torch.ops.torchao, op_name).default] = [
f"AOTITorchError aoti_torch_mps_{op_name}(AtenTensorHandle A, AtenTensorHandle B, int64_t group_size, AtenTensorHandle S, AtenTensorHandle Z, AtenTensorHandle* ret)",
]
43 changes: 43 additions & 0 deletions torchao/experimental/ops/mps/linear_fp_act_xbit_weight_aten.mm
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
// LICENSE file in the root directory of this source tree.

// clang-format off
#include <torch/csrc/inductor/aoti_torch/c/shim.h>
#include <torch/csrc/inductor/aoti_torch/utils.h>
#include <torch/library.h>
#include <ATen/native/mps/OperationUtils.h>
#include <torchao/experimental/kernels/mps/src/lowbit.h>
Expand Down Expand Up @@ -239,3 +241,44 @@ Tensor pack_weights_cpu_kernel(const Tensor& W) {
}

} // namespace torchao::kernels::mps::lowbit::aten


// c-shim wrappers for AOTInductor
// Check out TestUIntxWeightOnlyLinearQuantizer.test_export_accuracy on how to use it
#define DECLARE_LINEAR_FP_ACT_WEIGHT_FUNCTION(BITS) \
extern "C" { \
AOTI_TORCH_EXPORT AOTITorchError aoti_torch_mps__linear_fp_act_##BITS##bit_weight( \
AtenTensorHandle A, \
AtenTensorHandle B, \
int64_t group_size, \
AtenTensorHandle S, \
AtenTensorHandle Z, \
AtenTensorHandle* ret) { \
AOTI_TORCH_CONVERT_EXCEPTION_TO_ERROR_CODE({ \
auto op_handle = \
c10::Dispatcher::singleton() \
.findSchemaOrThrow("torchao::_linear_fp_act_" #BITS "bit_weight", "") \
.typed<at::Tensor( \
const at::Tensor& A, \
const at::Tensor& B, \
int64_t group_size, \
const at::Tensor& S, \
const at::Tensor& Z)>(); \
auto tmp_result = op_handle.call( \
torch::aot_inductor::resolve_tensor_dispatch_flags(A), \
torch::aot_inductor::resolve_tensor_dispatch_flags(B), \
group_size, \
torch::aot_inductor::resolve_tensor_dispatch_flags(S), \
torch::aot_inductor::resolve_tensor_dispatch_flags(Z)); \
*ret = torch::aot_inductor::new_tensor_handle(std::move(tmp_result)); \
}); \
} \
}

DECLARE_LINEAR_FP_ACT_WEIGHT_FUNCTION(1)
DECLARE_LINEAR_FP_ACT_WEIGHT_FUNCTION(2)
DECLARE_LINEAR_FP_ACT_WEIGHT_FUNCTION(3)
DECLARE_LINEAR_FP_ACT_WEIGHT_FUNCTION(4)
DECLARE_LINEAR_FP_ACT_WEIGHT_FUNCTION(5)
DECLARE_LINEAR_FP_ACT_WEIGHT_FUNCTION(6)
DECLARE_LINEAR_FP_ACT_WEIGHT_FUNCTION(7)
Loading