diff --git a/CMakeLists.txt b/CMakeLists.txt
index 01ad728c425..2a53e73c57c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -44,6 +44,7 @@
 
 cmake_minimum_required(VERSION 3.19)
 project(executorch)
+include(build/Codegen.cmake)
 include(build/Utils.cmake)
 include(CMakeDependentOption)
 
@@ -401,6 +402,7 @@ if(NOT EXECUTORCH_SRCS_FILE)
   message(STATUS "executorch: Generating source lists")
   set(EXECUTORCH_SRCS_FILE "${CMAKE_CURRENT_BINARY_DIR}/executorch_srcs.cmake")
   extract_sources(${EXECUTORCH_SRCS_FILE})
+  validate_build_variables()
 endif()
 
 # This file defines the `_<target>__srcs` variables used below.
diff --git a/build/Codegen.cmake b/build/Codegen.cmake
index 9ccb7ec442b..11e1c730aa1 100644
--- a/build/Codegen.cmake
+++ b/build/Codegen.cmake
@@ -9,7 +9,7 @@
 
 # Selective build. See codegen/tools/gen_oplist.py for how to use these
 # arguments.
-include(${EXECUTORCH_ROOT}/build/Utils.cmake)
+include(${CMAKE_CURRENT_LIST_DIR}/Utils.cmake)
 
 function(gen_selected_ops)
   set(arg_names LIB_NAME OPS_SCHEMA_YAML ROOT_OPS INCLUDE_ALL_OPS)
@@ -97,7 +97,7 @@ function(generate_bindings_for_kernels)
       --tags-path=${site-packages-out}/torchgen/packaged/ATen/native/tags.yaml
       --aten-yaml-path=${site-packages-out}/torchgen/packaged/ATen/native/native_functions.yaml
       --op-selection-yaml-path=${_oplist_yaml}
-    )
+  )
   if(GEN_ADD_EXCEPTION_BOUNDARY)
     set(_gen_command "${_gen_command}" --add-exception-boundary)
   endif()
@@ -217,3 +217,127 @@ function(merge_yaml)
     WORKING_DIRECTORY ${EXECUTORCH_ROOT}
   )
 endfunction()
+
+# Append the file list in the variable named `name` in build/build_variables.bzl
+# to the variable named `outputvar` in the caller's scope.
+function(append_filelist name outputvar)
+  # configure_file adds its input to the list of CMAKE_RERUN dependencies
+  configure_file(
+    ${PROJECT_SOURCE_DIR}/shim_et/xplat/executorch/build/build_variables.bzl
+    ${PROJECT_BINARY_DIR}/build_variables.bzl COPYONLY
+  )
+  execute_process(
+    COMMAND
+      "${PYTHON_EXECUTABLE}" -c
+      "exec(open('${PROJECT_SOURCE_DIR}/shim_et/xplat/executorch/build/build_variables.bzl').read());print(';'.join(${name}))"
+    WORKING_DIRECTORY "${_rootdir}"
+    RESULT_VARIABLE _retval
+    OUTPUT_VARIABLE _tempvar
+    ERROR_VARIABLE _stderr
+  )
+  if(NOT _retval EQUAL 0)
+    message(
+      FATAL_ERROR
+        "Failed to fetch filelist ${name} from build_variables.bzl with output ${_tempvar} and stderr ${_stderr}"
+    )
+  endif()
+  string(REPLACE "\n" "" _tempvar "${_tempvar}")
+  list(APPEND ${outputvar} ${_tempvar})
+  set(${outputvar}
+      "${${outputvar}}"
+      PARENT_SCOPE
+  )
+endfunction()
+
+# Fail the build if the src lists in build_variables.bzl do not match the src
+# lists extracted from Buck and placed into EXECUTORCH_SRCS_FILE. This is
+# intended to be a safety mechanism while we are in the process of removing Buck
+# from the CMake build and replacing it with build_variables.bzl; if you are
+# seeing failures after you have intentionally changed Buck srcs, then simply
+# update build_variables.bzl. If you are seeing failures after changing
+# something about the build system, make sure your changes will work both before
+# and after we finish replacing Buck with build_variables.bzl, which should
+# involve getting these lists to match!
+function(validate_build_variables)
+  include(${EXECUTORCH_SRCS_FILE})
+  set(BUILD_VARIABLES_FILELISTS
+      EXECUTORCH_SRCS
+      EXECUTORCH_CORE_SRCS
+      PORTABLE_KERNELS_SRCS
+      OPTIMIZED_KERNELS_SRCS
+      QUANTIZED_KERNELS_SRCS
+      PROGRAM_SCHEMA_SRCS
+      OPTIMIZED_CPUBLAS_SRCS
+      OPTIMIZED_NATIVE_CPU_OPS_SRCS
+      EXTENSION_DATA_LOADER_SRCS
+      EXTENSION_MODULE_SRCS
+      EXTENSION_RUNNER_UTIL_SRCS
+      EXTENSION_LLM_RUNNER_SRCS
+      EXTENSION_TENSOR_SRCS
+      EXTENSION_THREADPOOL_SRCS
+      EXTENSION_TRAINING_SRCS
+      TRAIN_XOR_SRCS
+      EXECUTOR_RUNNER_SRCS
+      SIZE_TEST_SRCS
+      MPS_EXECUTOR_RUNNER_SRCS
+      MPS_BACKEND_SRCS
+      MPS_SCHEMA_SRCS
+      XNN_EXECUTOR_RUNNER_SRCS
+      XNNPACK_BACKEND_SRCS
+      XNNPACK_SCHEMA_SRCS
+      VULKAN_SCHEMA_SRCS
+      CUSTOM_OPS_SRCS
+      LLAMA_RUNNER_SRCS
+  )
+  set(BUILD_VARIABLES_VARNAMES
+      _executorch__srcs
+      _executorch_core__srcs
+      _portable_kernels__srcs
+      _optimized_kernels__srcs
+      _quantized_kernels__srcs
+      _program_schema__srcs
+      _optimized_cpublas__srcs
+      _optimized_native_cpu_ops__srcs
+      _extension_data_loader__srcs
+      _extension_module__srcs
+      _extension_runner_util__srcs
+      _extension_llm_runner__srcs
+      _extension_tensor__srcs
+      _extension_threadpool__srcs
+      _extension_training__srcs
+      _train_xor__srcs
+      _executor_runner__srcs
+      _size_test__srcs
+      _mps_executor_runner__srcs
+      _mps_backend__srcs
+      _mps_schema__srcs
+      _xnn_executor_runner__srcs
+      _xnnpack_backend__srcs
+      _xnnpack_schema__srcs
+      _vulkan_schema__srcs
+      _custom_ops__srcs
+      _llama_runner__srcs
+  )
+  foreach(filelist_and_varname IN ZIP_LISTS BUILD_VARIABLES_FILELISTS
+                                  BUILD_VARIABLES_VARNAMES
+  )
+    if("${filelist_and_varname_1}" STREQUAL "_custom_ops__srcs")
+      continue()
+    endif()
+    append_filelist(
+      ${filelist_and_varname_0}
+      "${filelist_and_varname_1}_from_build_variables"
+    )
+    if(NOT ${filelist_and_varname_1} STREQUAL
+       ${filelist_and_varname_1}_from_build_variables
+    )
+      message(
+        FATAL_ERROR
+          "Buck-generated ${filelist_and_varname_1} does not match hardcoded "
+          "${filelist_and_varname_0} in build_variables.bzl. Left: "
+          "${${filelist_and_varname_1}}\n "
+          "Right: ${${filelist_and_varname_1}_from_build_variables}"
+      )
+    endif()
+  endforeach()
+endfunction()
diff --git a/build/Test.cmake b/build/Test.cmake
index 31e5aaf4d63..dbe590d610b 100644
--- a/build/Test.cmake
+++ b/build/Test.cmake
@@ -36,6 +36,7 @@ function(et_cxx_test target_name)
   cmake_parse_arguments(ET_CXX_TEST "" "" "${multi_arg_names}" ${ARGN})
 
   add_executable(${target_name} ${ET_CXX_TEST_SOURCES} ${EXECUTORCH_ROOT}/runtime/core/exec_aten/testing_util/tensor_util.cpp)
+  find_package(GTest)
   # Includes gtest, gmock, executorch by default
   target_link_libraries(
     ${target_name} GTest::gtest GTest::gtest_main GTest::gmock executorch
diff --git a/build/Utils.cmake b/build/Utils.cmake
index 646ef5ff285..6e7459430b7 100644
--- a/build/Utils.cmake
+++ b/build/Utils.cmake
@@ -329,29 +329,25 @@ function(resolve_python_executable)
 endfunction()
 
 # find_package(Torch CONFIG REQUIRED) replacement for targets that have a
-# header-only Torch dependency. Because find_package sets variables in the
-# parent scope, we use a macro to preserve this rather than maintaining our own
-# list of those variables.
-macro(find_package_torch_headers)
-  # We cannot simply use CMAKE_FIND_ROOT_PATH_BOTH, because that does not
-  # propagate into TorchConfig.cmake.
-  foreach(mode_kind IN ITEMS PACKAGE LIBRARY INCLUDE)
-    set(OLD_CMAKE_FIND_ROOT_PATH_MODE_${mode_kind}
-        ${CMAKE_FIND_ROOT_PATH_MODE_${mode_kind}}
-    )
-    set(CMAKE_FIND_ROOT_PATH_MODE_${mode_kind} BOTH)
-  endforeach()
-  find_package_torch()
-  foreach(mode_kind IN ITEMS PACKAGE LIBRARY INCLUDE)
-    set(CMAKE_FIND_ROOT_PATH_MODE_${mode_kind}
-        ${OLD_CMAKE_FIND_ROOT_PATH_MODE_${mode_kind}}
-    )
-  endforeach()
-endmacro()
+# header-only Torch dependency.
+#
+# Unlike find_package(Torch ...), this will only set
+# TORCH_INCLUDE_DIRS in the parent scope. In particular, it will NOT
+# set any of the following:
+# - TORCH_FOUND
+# - TORCH_LIBRARY
+# - TORCH_CXX_FLAGS
+function(find_package_torch_headers)
+  # We implement this way rather than using find_package so that
+  # cross-compilation can still use the host's installed copy of
+  # torch, since the headers should be fine.
+  get_torch_base_path(TORCH_BASE_PATH)
+  set(TORCH_INCLUDE_DIRS "${TORCH_BASE_PATH}/include;${TORCH_BASE_PATH}/include/torch/csrc/api/include" PARENT_SCOPE)
+endfunction()
 
-# Add the Torch CMake configuration to CMAKE_PREFIX_PATH so that find_package
-# can find Torch.
-function(add_torch_to_cmake_prefix_path)
+# Return the base path to the installed Torch Python library in
+# outVar.
+function(get_torch_base_path outVar)
   if(NOT PYTHON_EXECUTABLE)
     resolve_python_executable()
   endif()
@@ -370,6 +366,13 @@ function(add_torch_to_cmake_prefix_path)
     message("Output:\n${_tmp_torch_path}")
     message(FATAL_ERROR "Error:\n${_tmp_torch_path_error}")
   endif()
+  set(${outVar} ${_tmp_torch_path} PARENT_SCOPE)
+endfunction()
+
+# Add the Torch CMake configuration to CMAKE_PREFIX_PATH so that find_package
+# can find Torch.
+function(add_torch_to_cmake_prefix_path)
+  get_torch_base_path(_tmp_torch_path)
   list(APPEND CMAKE_PREFIX_PATH "${_tmp_torch_path}")
   set(CMAKE_PREFIX_PATH
       "${CMAKE_PREFIX_PATH}"
diff --git a/build/build_android_llm_demo.sh b/build/build_android_llm_demo.sh
index 4119bde4c7e..cb2d47fdeb3 100644
--- a/build/build_android_llm_demo.sh
+++ b/build/build_android_llm_demo.sh
@@ -44,6 +44,7 @@ build_android_native_library() {
     -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \
     -DANDROID_ABI="${ANDROID_ABI}" \
     -DANDROID_PLATFORM=android-26 \
+    -DBUILD_TESTING=OFF \
     -DEXECUTORCH_ENABLE_LOGGING=ON \
     -DEXECUTORCH_LOG_LEVEL=Info \
     -DEXECUTORCH_BUILD_XNNPACK=ON \
@@ -73,6 +74,7 @@ build_android_native_library() {
     -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
     -DANDROID_ABI="${ANDROID_ABI}" \
     -DANDROID_PLATFORM=android-26 \
+    -DBUILD_TESTING=OFF \
     -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
     -DEXECUTORCH_ENABLE_LOGGING=ON \
     -DEXECUTORCH_LOG_LEVEL=Info \
diff --git a/build/cmake_deps.toml b/build/cmake_deps.toml
index 21a8e282929..4b22a09cb5b 100644
--- a/build/cmake_deps.toml
+++ b/build/cmake_deps.toml
@@ -386,10 +386,7 @@ buck_targets = [
   "//extension/llm/custom_ops:custom_ops",
 ]
 filters = [
-  # Second clause is to pick up fht_neon.c/fht_avx.c from FFHT. TODO:
-  # remove filters and patch extract_sources.py's Buck query to fetch
-  # srcs; presumably filters is here to remove .h files.
-  "(.cpp$)|(fht.*\\.c$)",
+  ".cpp$",
 ]
 excludes = [
   "^codegen",
diff --git a/extension/android_test/setup.sh b/extension/android_test/setup.sh
index 725728b8092..c21d2c09623 100755
--- a/extension/android_test/setup.sh
+++ b/extension/android_test/setup.sh
@@ -21,6 +21,7 @@ build_native_library() {
   cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
     -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \
     -DANDROID_ABI="${ANDROID_ABI}" \
+    -DBUILD_TESTING=OFF \
     -DEXECUTORCH_BUILD_XNNPACK=ON \
     -DEXECUTORCH_XNNPACK_SHARED_WORKSPACE=ON \
     -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
@@ -36,6 +37,7 @@ build_native_library() {
   cmake extension/android \
     -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}"/build/cmake/android.toolchain.cmake \
     -DANDROID_ABI="${ANDROID_ABI}" \
+    -DBUILD_TESTING=OFF \
     -DCMAKE_INSTALL_PREFIX=c"${CMAKE_OUT}" \
     -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
     -DEXECUTORCH_BUILD_LLAMA_JNI=ON \
diff --git a/extension/llm/custom_ops/CMakeLists.txt b/extension/llm/custom_ops/CMakeLists.txt
index 95f4bc559fa..c3969e6f9bf 100644
--- a/extension/llm/custom_ops/CMakeLists.txt
+++ b/extension/llm/custom_ops/CMakeLists.txt
@@ -45,6 +45,22 @@ list(APPEND custom_ops_libs cpuinfo)
 list(APPEND custom_ops_libs cpublas)
 list(APPEND custom_ops_libs eigen_blas)
 
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|armv7)$")
+  list(APPEND _custom_ops__srcs
+       "extension/llm/custom_ops/spinquant/third-party/FFHT/fht_neon.c"
+  )
+elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+  list(APPEND _custom_ops__srcs
+       "extension/llm/custom_ops/spinquant/third-party/FFHT/fht_avx.c"
+  )
+else()
+  message(
+    FATAL_ERROR
+      "Unsupported CMAKE_SYSTEM_PROCESSOR ${CMAKE_SYSTEM_PROCESSOR}. (If \
+32-bit x86, try using fht_avx.c and send a PR if it works!)"
+  )
+endif()
+
 list(TRANSFORM _custom_ops__srcs PREPEND "${EXECUTORCH_ROOT}/")
 
 if(NOT EXECUTORCH_BUILD_XNNPACK)
@@ -121,3 +137,8 @@ if(EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT)
           LIBRARY DESTINATION executorch/extension/llm/custom_ops
   )
 endif()
+
+add_subdirectory(spinquant/third-party/FFHT)
+if(BUILD_TESTING)
+  add_subdirectory(spinquant/test)
+endif()
diff --git a/extension/llm/custom_ops/spinquant/test/CMakeLists.txt b/extension/llm/custom_ops/spinquant/test/CMakeLists.txt
new file mode 100644
index 00000000000..c793d2ed975
--- /dev/null
+++ b/extension/llm/custom_ops/spinquant/test/CMakeLists.txt
@@ -0,0 +1,30 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# @generated by test/utils/generate_gtest_cmakelists.py
+#
+# This file should be formatted with
+# ~~~
+# cmake-format -i CMakeLists.txt
+# ~~~
+# It should also be cmake-lint clean.
+#
+
+cmake_minimum_required(VERSION 3.19)
+
+set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../../..)
+
+include(${EXECUTORCH_ROOT}/build/Test.cmake)
+
+set(_test_srcs
+    fast_hadamard_transform_test.cpp fast_hadamard_transform_test_impl.cpp
+    op_fast_hadamard_transform_test.cpp
+)
+
+et_cxx_test(
+  extension_llm_custom_ops_spinquant_test SOURCES ${_test_srcs} EXTRA_LIBS
+  custom_ops dumb_fht
+)
diff --git a/extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeLists.txt b/extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeLists.txt
new file mode 100644
index 00000000000..2e3089be72e
--- /dev/null
+++ b/extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeLists.txt
@@ -0,0 +1,12 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Please this file formatted by running:
+# ~~~
+# cmake-format -i CMakeLists.txt
+# ~~~
+
+add_library(dumb_fht dumb_fht.c)
diff --git a/shim_et/xplat/executorch/build/build_variables.bzl b/shim_et/xplat/executorch/build/build_variables.bzl
new file mode 100644
index 00000000000..8f89e6b5267
--- /dev/null
+++ b/shim_et/xplat/executorch/build/build_variables.bzl
@@ -0,0 +1,497 @@
+# WARNING: the contents of this file must BOTH be valid Starlark (for Buck) as well as
+# valid Python (for our cmake build). This means that load() directives are not allowed
+# (as they are not recognized by Python). If you want to fix this, figure out how run
+# this file from cmake with a proper Starlark interpreter as part of the default OSS
+# build process. If you need some nontrivial Starlark features, make a separate bzl
+# file. (Remember that bzl files are not exported via ShipIt by default, so you may also
+# need to update ExecuTorch's ShipIt config.)
+
+# This file contains srcs lists that are shared between our Buck and CMake build
+# systems. We had three choices for listing src files:
+# 1) List them in Buck and use buck query to get them in CMake. This was our setup for a
+# long time; the problem is that OSS users would prefer not to have to deal with Buck at
+# all.
+# 2) List them in both Buck targets.bzl files and CMake's CMakeLists.txt files. This is
+# unnecessary duplication, and people will invariably forget to update one or the other
+# 3) List them somewhere CMake and Buck can both get at them; that's this file. Buck
+# files can load() it, and our CMake build evaluates it with Python. (See
+# append_filelist in build/Codegen.cmake.)
+#
+# Inconveniently, the Buck target layout is much more granular than the CMake library
+# layout, leading to several complications:
+# 1) Single-file Buck targets will just list the one src file they contain. Nothing to
+# share with CMake in that case, and that src will be in a list in this file that does
+# not map directly to that particular Buck target.
+# 2) Multi-file Buck targets should have a list below that corresponds exactly to their
+# `srcs`. There should then be simple Python code that combines those lists into lists
+# that map 1:1 to the CMake library layout.
+
+EXECUTORCH_SRCS = [
+    "kernels/prim_ops/et_copy_index.cpp",
+    "kernels/prim_ops/et_view.cpp",
+    "kernels/prim_ops/register_prim_ops.cpp",
+]
+
+EXECUTORCH_CORE_SRCS = [
+    "runtime/backend/interface.cpp",
+    "runtime/core/evalue.cpp",
+    "runtime/core/exec_aten/util/tensor_shape_to_c_string.cpp",
+    "runtime/core/exec_aten/util/tensor_util_portable.cpp",
+    "runtime/core/portable_type/tensor_impl.cpp",
+    "runtime/core/tag.cpp",
+    "runtime/core/tensor_layout.cpp",
+    "runtime/executor/method.cpp",
+    "runtime/executor/method_meta.cpp",
+    "runtime/executor/program.cpp",
+    "runtime/executor/tensor_parser_exec_aten.cpp",
+    "runtime/executor/tensor_parser_portable.cpp",
+    "runtime/kernel/operator_registry.cpp",
+    "runtime/platform/abort.cpp",
+    "runtime/platform/default/posix.cpp",
+    "runtime/platform/log.cpp",
+    "runtime/platform/profiler.cpp",
+    "runtime/platform/runtime.cpp",
+    "schema/extended_header.cpp",
+]
+
+PORTABLE_KERNELS_SRCS = [
+    "kernels/portable/cpu/op__empty_dim_order.cpp",
+    "kernels/portable/cpu/op__to_dim_order_copy.cpp",
+    "kernels/portable/cpu/op_abs.cpp",
+    "kernels/portable/cpu/op_acos.cpp",
+    "kernels/portable/cpu/op_acosh.cpp",
+    "kernels/portable/cpu/op_add.cpp",
+    "kernels/portable/cpu/op_addmm.cpp",
+    "kernels/portable/cpu/op_alias_copy.cpp",
+    "kernels/portable/cpu/op_allclose.cpp",
+    "kernels/portable/cpu/op_amax.cpp",
+    "kernels/portable/cpu/op_amin.cpp",
+    "kernels/portable/cpu/op_any.cpp",
+    "kernels/portable/cpu/op_arange.cpp",
+    "kernels/portable/cpu/op_argmax.cpp",
+    "kernels/portable/cpu/op_argmin.cpp",
+    "kernels/portable/cpu/op_as_strided_copy.cpp",
+    "kernels/portable/cpu/op_asin.cpp",
+    "kernels/portable/cpu/op_asinh.cpp",
+    "kernels/portable/cpu/op_atan.cpp",
+    "kernels/portable/cpu/op_atan2.cpp",
+    "kernels/portable/cpu/op_atanh.cpp",
+    "kernels/portable/cpu/op_avg_pool2d.cpp",
+    "kernels/portable/cpu/op_bitwise_and.cpp",
+    "kernels/portable/cpu/op_bitwise_not.cpp",
+    "kernels/portable/cpu/op_bitwise_or.cpp",
+    "kernels/portable/cpu/op_bitwise_xor.cpp",
+    "kernels/portable/cpu/op_bmm.cpp",
+    "kernels/portable/cpu/op_cat.cpp",
+    "kernels/portable/cpu/op_cdist_forward.cpp",
+    "kernels/portable/cpu/op_ceil.cpp",
+    "kernels/portable/cpu/op_clamp.cpp",
+    "kernels/portable/cpu/op_clone.cpp",
+    "kernels/portable/cpu/op_constant_pad_nd.cpp",
+    "kernels/portable/cpu/op_convolution.cpp",
+    "kernels/portable/cpu/op_convolution_backward.cpp",
+    "kernels/portable/cpu/op_copy.cpp",
+    "kernels/portable/cpu/op_cos.cpp",
+    "kernels/portable/cpu/op_cosh.cpp",
+    "kernels/portable/cpu/op_cumsum.cpp",
+    "kernels/portable/cpu/op_detach_copy.cpp",
+    "kernels/portable/cpu/op_diagonal_copy.cpp",
+    "kernels/portable/cpu/op_div.cpp",
+    "kernels/portable/cpu/op_embedding.cpp",
+    "kernels/portable/cpu/op_empty.cpp",
+    "kernels/portable/cpu/op_eq.cpp",
+    "kernels/portable/cpu/op_erf.cpp",
+    "kernels/portable/cpu/op_exp.cpp",
+    "kernels/portable/cpu/op_expand_copy.cpp",
+    "kernels/portable/cpu/op_expm1.cpp",
+    "kernels/portable/cpu/op_fill.cpp",
+    "kernels/portable/cpu/op_flip.cpp",
+    "kernels/portable/cpu/op_floor.cpp",
+    "kernels/portable/cpu/op_floor_divide.cpp",
+    "kernels/portable/cpu/op_fmod.cpp",
+    "kernels/portable/cpu/op_full.cpp",
+    "kernels/portable/cpu/op_full_like.cpp",
+    "kernels/portable/cpu/op_gather.cpp",
+    "kernels/portable/cpu/op_ge.cpp",
+    "kernels/portable/cpu/op_gelu.cpp",
+    "kernels/portable/cpu/op_glu.cpp",
+    "kernels/portable/cpu/op_gt.cpp",
+    "kernels/portable/cpu/op_hardtanh.cpp",
+    "kernels/portable/cpu/op_index.cpp",
+    "kernels/portable/cpu/op_index_put.cpp",
+    "kernels/portable/cpu/op_index_select.cpp",
+    "kernels/portable/cpu/op_isinf.cpp",
+    "kernels/portable/cpu/op_isnan.cpp",
+    "kernels/portable/cpu/op_le.cpp",
+    "kernels/portable/cpu/op_leaky_relu.cpp",
+    "kernels/portable/cpu/op_lift_fresh_copy.cpp",
+    "kernels/portable/cpu/op_linear_scratch_example.cpp",
+    "kernels/portable/cpu/op_log.cpp",
+    "kernels/portable/cpu/op_log10.cpp",
+    "kernels/portable/cpu/op_log1p.cpp",
+    "kernels/portable/cpu/op_log2.cpp",
+    "kernels/portable/cpu/op_log_softmax.cpp",
+    "kernels/portable/cpu/op_logical_and.cpp",
+    "kernels/portable/cpu/op_logical_not.cpp",
+    "kernels/portable/cpu/op_logical_or.cpp",
+    "kernels/portable/cpu/op_logical_xor.cpp",
+    "kernels/portable/cpu/op_logit.cpp",
+    "kernels/portable/cpu/op_lt.cpp",
+    "kernels/portable/cpu/op_masked_fill.cpp",
+    "kernels/portable/cpu/op_masked_scatter.cpp",
+    "kernels/portable/cpu/op_masked_select.cpp",
+    "kernels/portable/cpu/op_max.cpp",
+    "kernels/portable/cpu/op_max_pool2d_with_indices.cpp",
+    "kernels/portable/cpu/op_maximum.cpp",
+    "kernels/portable/cpu/op_mean.cpp",
+    "kernels/portable/cpu/op_min.cpp",
+    "kernels/portable/cpu/op_minimum.cpp",
+    "kernels/portable/cpu/op_mm.cpp",
+    "kernels/portable/cpu/op_mul.cpp",
+    "kernels/portable/cpu/op_narrow_copy.cpp",
+    "kernels/portable/cpu/op_native_batch_norm.cpp",
+    "kernels/portable/cpu/op_native_group_norm.cpp",
+    "kernels/portable/cpu/op_native_layer_norm.cpp",
+    "kernels/portable/cpu/op_ne.cpp",
+    "kernels/portable/cpu/op_neg.cpp",
+    "kernels/portable/cpu/op_nonzero.cpp",
+    "kernels/portable/cpu/op_ones.cpp",
+    "kernels/portable/cpu/op_pdist_forward.cpp",
+    "kernels/portable/cpu/op_permute_copy.cpp",
+    "kernels/portable/cpu/op_pixel_shuffle.cpp",
+    "kernels/portable/cpu/op_pixel_unshuffle.cpp",
+    "kernels/portable/cpu/op_pow.cpp",
+    "kernels/portable/cpu/op_prod.cpp",
+    "kernels/portable/cpu/op_reciprocal.cpp",
+    "kernels/portable/cpu/op_reflection_pad1d.cpp",
+    "kernels/portable/cpu/op_reflection_pad2d.cpp",
+    "kernels/portable/cpu/op_reflection_pad3d.cpp",
+    "kernels/portable/cpu/op_relu.cpp",
+    "kernels/portable/cpu/op_remainder.cpp",
+    "kernels/portable/cpu/op_repeat.cpp",
+    "kernels/portable/cpu/op_repeat_interleave.cpp",
+    "kernels/portable/cpu/op_replication_pad1d.cpp",
+    "kernels/portable/cpu/op_replication_pad2d.cpp",
+    "kernels/portable/cpu/op_replication_pad3d.cpp",
+    "kernels/portable/cpu/op_roll.cpp",
+    "kernels/portable/cpu/op_round.cpp",
+    "kernels/portable/cpu/op_rsqrt.cpp",
+    "kernels/portable/cpu/op_rsub.cpp",
+    "kernels/portable/cpu/op_scalar_tensor.cpp",
+    "kernels/portable/cpu/op_scatter.cpp",
+    "kernels/portable/cpu/op_scatter_add.cpp",
+    "kernels/portable/cpu/op_select_copy.cpp",
+    "kernels/portable/cpu/op_select_scatter.cpp",
+    "kernels/portable/cpu/op_sigmoid.cpp",
+    "kernels/portable/cpu/op_sign.cpp",
+    "kernels/portable/cpu/op_sin.cpp",
+    "kernels/portable/cpu/op_sinh.cpp",
+    "kernels/portable/cpu/op_slice_copy.cpp",
+    "kernels/portable/cpu/op_slice_scatter.cpp",
+    "kernels/portable/cpu/op_softmax.cpp",
+    "kernels/portable/cpu/op_split_copy.cpp",
+    "kernels/portable/cpu/op_split_with_sizes_copy.cpp",
+    "kernels/portable/cpu/op_sqrt.cpp",
+    "kernels/portable/cpu/op_squeeze_copy.cpp",
+    "kernels/portable/cpu/op_stack.cpp",
+    "kernels/portable/cpu/op_sub.cpp",
+    "kernels/portable/cpu/op_sum.cpp",
+    "kernels/portable/cpu/op_t_copy.cpp",
+    "kernels/portable/cpu/op_tan.cpp",
+    "kernels/portable/cpu/op_tanh.cpp",
+    "kernels/portable/cpu/op_to_copy.cpp",
+    "kernels/portable/cpu/op_topk.cpp",
+    "kernels/portable/cpu/op_transpose_copy.cpp",
+    "kernels/portable/cpu/op_tril.cpp",
+    "kernels/portable/cpu/op_trunc.cpp",
+    "kernels/portable/cpu/op_unbind_copy.cpp",
+    "kernels/portable/cpu/op_unsqueeze_copy.cpp",
+    "kernels/portable/cpu/op_upsample_bilinear2d.cpp",
+    "kernels/portable/cpu/op_upsample_nearest2d.cpp",
+    "kernels/portable/cpu/op_var.cpp",
+    "kernels/portable/cpu/op_view_copy.cpp",
+    "kernels/portable/cpu/op_where.cpp",
+    "kernels/portable/cpu/op_zeros.cpp",
+    "kernels/portable/cpu/pattern/unary_ufunc_realh.cpp",
+    "kernels/portable/cpu/pattern/unary_ufunc_realhb_to_bool.cpp",
+    "kernels/portable/cpu/pattern/unary_ufunc_realhbbf16_to_floathbf16.cpp",
+    "kernels/portable/cpu/util/activation_ops_util.cpp",
+    "kernels/portable/cpu/util/advanced_index_util.cpp",
+    "kernels/portable/cpu/util/broadcast_util.cpp",
+    "kernels/portable/cpu/util/copy_ops_util.cpp",
+    "kernels/portable/cpu/util/distance_util.cpp",
+    "kernels/portable/cpu/util/dtype_util.cpp",
+    "kernels/portable/cpu/util/index_util.cpp",
+    "kernels/portable/cpu/util/kernel_ops_util.cpp",
+    "kernels/portable/cpu/util/matmul_ops_util.cpp",
+    "kernels/portable/cpu/util/normalization_ops_util.cpp",
+    "kernels/portable/cpu/util/padding_util.cpp",
+    "kernels/portable/cpu/util/reduce_util.cpp",
+    "kernels/portable/cpu/util/repeat_util.cpp",
+    "kernels/portable/cpu/util/select_copy_util.cpp",
+    "kernels/portable/cpu/util/slice_util.cpp",
+    "kernels/portable/cpu/util/upsample_util.cpp",
+]
+
+OPTIMIZED_KERNELS_SRCS = [
+    "extension/parallel/thread_parallel.cpp",
+    "kernels/optimized/blas/BlasKernel.cpp",
+    "kernels/optimized/blas/CPUBlas.cpp",
+    "kernels/optimized/cpu/op_add.cpp",
+    "kernels/optimized/cpu/op_bmm.cpp",
+    "kernels/optimized/cpu/op_div.cpp",
+    "kernels/optimized/cpu/op_exp.cpp",
+    "kernels/optimized/cpu/op_fft_r2c.cpp",
+    "kernels/optimized/cpu/op_gelu.cpp",
+    "kernels/optimized/cpu/op_le.cpp",
+    "kernels/optimized/cpu/op_linear.cpp",
+    "kernels/optimized/cpu/op_log_softmax.cpp",
+    "kernels/optimized/cpu/op_mm.cpp",
+    "kernels/optimized/cpu/op_mul.cpp",
+    "kernels/optimized/cpu/op_native_layer_norm.cpp",
+    "kernels/optimized/cpu/op_neg.cpp",
+    "kernels/optimized/cpu/op_sigmoid.cpp",
+    "kernels/optimized/cpu/op_sub.cpp",
+]
+
+QUANTIZED_KERNELS_SRCS = [
+    "kernels/quantized/cpu/embeddingxb.cpp",
+    "kernels/quantized/cpu/op_add.cpp",
+    "kernels/quantized/cpu/op_choose_qparams.cpp",
+    "kernels/quantized/cpu/op_dequantize.cpp",
+    "kernels/quantized/cpu/op_embedding.cpp",
+    "kernels/quantized/cpu/op_embedding2b.cpp",
+    "kernels/quantized/cpu/op_embedding4b.cpp",
+    "kernels/quantized/cpu/op_mixed_linear.cpp",
+    "kernels/quantized/cpu/op_mixed_mm.cpp",
+    "kernels/quantized/cpu/op_quantize.cpp",
+]
+
+PROGRAM_SCHEMA_SRCS = [
+    "schema/program.fbs",
+    "schema/scalar_type.fbs",
+]
+
+OPTIMIZED_CPUBLAS_SRCS = [
+    "extension/parallel/thread_parallel.cpp",
+    "extension/threadpool/threadpool.cpp",
+    "extension/threadpool/threadpool_guard.cpp",
+    "kernels/optimized/blas/BlasKernel.cpp",
+    "kernels/optimized/blas/CPUBlas.cpp",
+]
+
+OPTIMIZED_NATIVE_CPU_OPS_SRCS = [
+    "codegen/templates/RegisterCodegenUnboxedKernels.cpp",
+    "codegen/templates/RegisterDispatchKeyCustomOps.cpp",
+    "codegen/templates/RegisterKernels.cpp",
+    "codegen/templates/RegisterSchema.cpp",
+    "extension/parallel/thread_parallel.cpp",
+    "extension/threadpool/threadpool.cpp",
+    "extension/threadpool/threadpool_guard.cpp",
+    "kernels/optimized/blas/BlasKernel.cpp",
+    "kernels/optimized/blas/CPUBlas.cpp",
+    "kernels/optimized/cpu/op_add.cpp",
+    "kernels/optimized/cpu/op_bmm.cpp",
+    "kernels/optimized/cpu/op_div.cpp",
+    "kernels/optimized/cpu/op_exp.cpp",
+    "kernels/optimized/cpu/op_fft_r2c.cpp",
+    "kernels/optimized/cpu/op_gelu.cpp",
+    "kernels/optimized/cpu/op_le.cpp",
+    "kernels/optimized/cpu/op_linear.cpp",
+    "kernels/optimized/cpu/op_log_softmax.cpp",
+    "kernels/optimized/cpu/op_mm.cpp",
+    "kernels/optimized/cpu/op_mul.cpp",
+    "kernels/optimized/cpu/op_native_layer_norm.cpp",
+    "kernels/optimized/cpu/op_neg.cpp",
+    "kernels/optimized/cpu/op_sigmoid.cpp",
+    "kernels/optimized/cpu/op_sub.cpp",
+]
+
+EXTENSION_DATA_LOADER_SRCS = [
+    "extension/data_loader/file_data_loader.cpp",
+    "extension/data_loader/mmap_data_loader.cpp",
+]
+
+EXTENSION_MODULE_SRCS = [
+    "extension/flat_tensor/flat_tensor_data_map.cpp",
+    "extension/flat_tensor/serialize/flat_tensor_header.cpp",
+    "extension/module/module.cpp",
+]
+
+EXTENSION_RUNNER_UTIL_SRCS = [
+    "extension/runner_util/inputs.cpp",
+    "extension/runner_util/inputs_portable.cpp",
+]
+
+EXTENSION_LLM_RUNNER_SRCS = [
+    "extension/data_loader/file_data_loader.cpp",
+    "extension/data_loader/mmap_data_loader.cpp",
+    "extension/llm/runner/text_decoder_runner.cpp",
+    "extension/llm/runner/text_prefiller.cpp",
+    "extension/llm/sampler/sampler.cpp",
+    "extension/tensor/tensor_ptr.cpp",
+    "extension/tensor/tensor_ptr_maker.cpp",
+]
+
+EXTENSION_TENSOR_SRCS = [
+    "extension/tensor/tensor_ptr.cpp",
+    "extension/tensor/tensor_ptr_maker.cpp",
+]
+
+EXTENSION_THREADPOOL_SRCS = [
+    "extension/threadpool/threadpool.cpp",
+    "extension/threadpool/threadpool_guard.cpp",
+]
+
+EXTENSION_TRAINING_SRCS = [
+    "extension/data_loader/file_data_loader.cpp",
+    "extension/data_loader/mmap_data_loader.cpp",
+    "extension/flat_tensor/flat_tensor_data_map.cpp",
+    "extension/flat_tensor/serialize/flat_tensor_header.cpp",
+    "extension/module/module.cpp",
+    "extension/training/module/training_module.cpp",
+    "extension/training/optimizer/sgd.cpp",
+    "kernels/prim_ops/et_copy_index.cpp",
+    "kernels/prim_ops/et_view.cpp",
+    "kernels/prim_ops/register_prim_ops.cpp",
+]
+
+TRAIN_XOR_SRCS = [
+    "extension/data_loader/file_data_loader.cpp",
+    "extension/data_loader/mmap_data_loader.cpp",
+    "extension/flat_tensor/flat_tensor_data_map.cpp",
+    "extension/flat_tensor/serialize/flat_tensor_header.cpp",
+    "extension/flat_tensor/serialize/serialize.cpp",
+    "extension/module/module.cpp",
+    "extension/tensor/tensor_ptr.cpp",
+    "extension/tensor/tensor_ptr_maker.cpp",
+    "extension/training/examples/XOR/train.cpp",
+    "extension/training/module/training_module.cpp",
+    "extension/training/optimizer/sgd.cpp",
+]
+
+EXECUTOR_RUNNER_SRCS = [
+    "examples/portable/executor_runner/executor_runner.cpp",
+    "extension/data_loader/file_data_loader.cpp",
+    "extension/evalue_util/print_evalue.cpp",
+    "extension/runner_util/inputs.cpp",
+    "extension/runner_util/inputs_portable.cpp",
+    "runtime/executor/test/test_backend_compiler_lib.cpp",
+]
+
+SIZE_TEST_SRCS = [
+    "extension/data_loader/file_data_loader.cpp",
+    "test/size_test.cpp",
+]
+
+MPS_EXECUTOR_RUNNER_SRCS = [
+    "backends/apple/mps/runtime/MPSBackend.mm",
+    "backends/apple/mps/runtime/MPSCompiler.mm",
+    "backends/apple/mps/runtime/MPSDelegateHeader.mm",
+    "backends/apple/mps/runtime/MPSDevice.mm",
+    "backends/apple/mps/runtime/MPSExecutor.mm",
+    "backends/apple/mps/runtime/MPSGraphBuilder.mm",
+    "backends/apple/mps/runtime/MPSStream.mm",
+    "backends/apple/mps/runtime/operations/ActivationOps.mm",
+    "backends/apple/mps/runtime/operations/BinaryOps.mm",
+    "backends/apple/mps/runtime/operations/ClampOps.mm",
+    "backends/apple/mps/runtime/operations/ConstantOps.mm",
+    "backends/apple/mps/runtime/operations/ConvolutionOps.mm",
+    "backends/apple/mps/runtime/operations/IndexingOps.mm",
+    "backends/apple/mps/runtime/operations/LinearAlgebra.mm",
+    "backends/apple/mps/runtime/operations/NormalizationOps.mm",
+    "backends/apple/mps/runtime/operations/OperationUtils.mm",
+    "backends/apple/mps/runtime/operations/PadOps.mm",
+    "backends/apple/mps/runtime/operations/PoolingOps.mm",
+    "backends/apple/mps/runtime/operations/QuantDequant.mm",
+    "backends/apple/mps/runtime/operations/RangeOps.mm",
+    "backends/apple/mps/runtime/operations/ReduceOps.mm",
+    "backends/apple/mps/runtime/operations/ShapeOps.mm",
+    "backends/apple/mps/runtime/operations/UnaryOps.mm",
+    "devtools/bundled_program/bundled_program.cpp",
+    "devtools/etdump/emitter.cpp",
+    "devtools/etdump/etdump_flatcc.cpp",
+    "examples/apple/mps/executor_runner/mps_executor_runner.mm",
+    "extension/data_loader/file_data_loader.cpp",
+    "extension/evalue_util/print_evalue.cpp",
+    "extension/runner_util/inputs.cpp",
+    "extension/runner_util/inputs_portable.cpp",
+]
+
+MPS_BACKEND_SRCS = [
+    "backends/apple/mps/runtime/MPSBackend.mm",
+    "backends/apple/mps/runtime/MPSCompiler.mm",
+    "backends/apple/mps/runtime/MPSDelegateHeader.mm",
+    "backends/apple/mps/runtime/MPSDevice.mm",
+    "backends/apple/mps/runtime/MPSExecutor.mm",
+    "backends/apple/mps/runtime/MPSGraphBuilder.mm",
+    "backends/apple/mps/runtime/MPSStream.mm",
+    "backends/apple/mps/runtime/operations/ActivationOps.mm",
+    "backends/apple/mps/runtime/operations/BinaryOps.mm",
+    "backends/apple/mps/runtime/operations/ClampOps.mm",
+    "backends/apple/mps/runtime/operations/ConstantOps.mm",
+    "backends/apple/mps/runtime/operations/ConvolutionOps.mm",
+    "backends/apple/mps/runtime/operations/IndexingOps.mm",
+    "backends/apple/mps/runtime/operations/LinearAlgebra.mm",
+    "backends/apple/mps/runtime/operations/NormalizationOps.mm",
+    "backends/apple/mps/runtime/operations/OperationUtils.mm",
+    "backends/apple/mps/runtime/operations/PadOps.mm",
+    "backends/apple/mps/runtime/operations/PoolingOps.mm",
+    "backends/apple/mps/runtime/operations/QuantDequant.mm",
+    "backends/apple/mps/runtime/operations/RangeOps.mm",
+    "backends/apple/mps/runtime/operations/ReduceOps.mm",
+    "backends/apple/mps/runtime/operations/ShapeOps.mm",
+    "backends/apple/mps/runtime/operations/UnaryOps.mm",
+]
+
+MPS_SCHEMA_SRCS = [
+    "backends/apple/mps/serialization/schema.fbs",
+]
+
+XNN_EXECUTOR_RUNNER_SRCS = [
+    "examples/portable/executor_runner/executor_runner.cpp",
+    "extension/data_loader/file_data_loader.cpp",
+    "extension/evalue_util/print_evalue.cpp",
+    "extension/runner_util/inputs.cpp",
+    "extension/runner_util/inputs_portable.cpp",
+]
+
+XNNPACK_BACKEND_SRCS = [
+    "backends/xnnpack/runtime/XNNCompiler.cpp",
+    "backends/xnnpack/runtime/XNNExecutor.cpp",
+    "backends/xnnpack/runtime/XNNHeader.cpp",
+    "backends/xnnpack/runtime/XNNPACKBackend.cpp",
+    "backends/xnnpack/runtime/profiling/XNNProfiler.cpp",
+    "extension/threadpool/threadpool.cpp",
+    "extension/threadpool/threadpool_guard.cpp",
+]
+
+XNNPACK_SCHEMA_SRCS = [
+    "backends/xnnpack/serialization/runtime_schema.fbs",
+]
+
+VULKAN_SCHEMA_SRCS = [
+    "backends/vulkan/serialization/schema.fbs",
+]
+
+CUSTOM_OPS_SRCS = [
+    "extension/llm/custom_ops/op_fallback.cpp",
+    "extension/llm/custom_ops/op_fast_hadamard_transform.cpp",
+    "extension/llm/custom_ops/op_sdpa.cpp",
+    "extension/llm/custom_ops/op_update_cache.cpp",
+    "extension/llm/custom_ops/spinquant/fast_hadamard_transform.cpp",
+    "kernels/portable/cpu/util/reduce_util.cpp",
+]
+
+LLAMA_RUNNER_SRCS = [
+    "examples/models/llama/runner/runner.cpp",
+    "examples/models/llama/tokenizer/llama_tiktoken.cpp",
+    "extension/evalue_util/print_evalue.cpp",
+    "extension/llm/runner/text_decoder_runner.cpp",
+    "extension/llm/runner/text_prefiller.cpp",
+    "extension/llm/sampler/sampler.cpp",
+    "extension/llm/tokenizer/bpe_tokenizer.cpp",
+    "extension/llm/tokenizer/tiktoken.cpp",
+    "extension/tensor/tensor_ptr.cpp",
+    "extension/tensor/tensor_ptr_maker.cpp",
+]
diff --git a/test/run_oss_cpp_tests.sh b/test/run_oss_cpp_tests.sh
index f747100006d..ff2ed048257 100755
--- a/test/run_oss_cpp_tests.sh
+++ b/test/run_oss_cpp_tests.sh
@@ -35,6 +35,7 @@ build_executorch() {
   cmake . \
     -DCMAKE_INSTALL_PREFIX=cmake-out \
     -DEXECUTORCH_USE_CPP_CODE_COVERAGE=ON \
+    -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
     -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
diff --git a/test/utils/OSSTestConfig.json b/test/utils/OSSTestConfig.json
index 6eff74eec86..70cb2d2e44f 100644
--- a/test/utils/OSSTestConfig.json
+++ b/test/utils/OSSTestConfig.json
@@ -1,4 +1,15 @@
 { "tests": [
+    {
+        "directory": "extension/llm/custom_ops/spinquant/test",
+        "sources": [
+            "fast_hadamard_transform_test.cpp",
+            "fast_hadamard_transform_test_impl.cpp",
+            "op_fast_hadamard_transform_test.cpp"
+        ],
+        "additional_libs": [
+            "custom_ops"
+        ]
+    },
     {
         "directory": "extension/data_loader/test",
         "sources": [