diff --git a/CMakeLists.txt b/CMakeLists.txt index 01ad728c425..2a53e73c57c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,6 +44,7 @@ cmake_minimum_required(VERSION 3.19) project(executorch) +include(build/Codegen.cmake) include(build/Utils.cmake) include(CMakeDependentOption) @@ -401,6 +402,7 @@ if(NOT EXECUTORCH_SRCS_FILE) message(STATUS "executorch: Generating source lists") set(EXECUTORCH_SRCS_FILE "${CMAKE_CURRENT_BINARY_DIR}/executorch_srcs.cmake") extract_sources(${EXECUTORCH_SRCS_FILE}) + validate_build_variables() endif() # This file defines the `___srcs` variables used below. diff --git a/build/Codegen.cmake b/build/Codegen.cmake index 9ccb7ec442b..11e1c730aa1 100644 --- a/build/Codegen.cmake +++ b/build/Codegen.cmake @@ -9,7 +9,7 @@ # Selective build. See codegen/tools/gen_oplist.py for how to use these # arguments. -include(${EXECUTORCH_ROOT}/build/Utils.cmake) +include(${CMAKE_CURRENT_LIST_DIR}/Utils.cmake) function(gen_selected_ops) set(arg_names LIB_NAME OPS_SCHEMA_YAML ROOT_OPS INCLUDE_ALL_OPS) @@ -97,7 +97,7 @@ function(generate_bindings_for_kernels) --tags-path=${site-packages-out}/torchgen/packaged/ATen/native/tags.yaml --aten-yaml-path=${site-packages-out}/torchgen/packaged/ATen/native/native_functions.yaml --op-selection-yaml-path=${_oplist_yaml} - ) + ) if(GEN_ADD_EXCEPTION_BOUNDARY) set(_gen_command "${_gen_command}" --add-exception-boundary) endif() @@ -217,3 +217,127 @@ function(merge_yaml) WORKING_DIRECTORY ${EXECUTORCH_ROOT} ) endfunction() + +# Append the file list in the variable named `name` in build/build_variables.bzl +# to the variable named `outputvar` in the caller's scope. +function(append_filelist name outputvar) + # configure_file adds its input to the list of CMAKE_RERUN dependencies + configure_file( + ${PROJECT_SOURCE_DIR}/shim_et/xplat/executorch/build/build_variables.bzl + ${PROJECT_BINARY_DIR}/build_variables.bzl COPYONLY + ) + execute_process( + COMMAND + "${PYTHON_EXECUTABLE}" -c + "exec(open('${PROJECT_SOURCE_DIR}/shim_et/xplat/executorch/build/build_variables.bzl').read());print(';'.join(${name}))" + WORKING_DIRECTORY "${_rootdir}" + RESULT_VARIABLE _retval + OUTPUT_VARIABLE _tempvar + ERROR_VARIABLE _stderr + ) + if(NOT _retval EQUAL 0) + message( + FATAL_ERROR + "Failed to fetch filelist ${name} from build_variables.bzl with output ${_tempvar} and stderr ${_stderr}" + ) + endif() + string(REPLACE "\n" "" _tempvar "${_tempvar}") + list(APPEND ${outputvar} ${_tempvar}) + set(${outputvar} + "${${outputvar}}" + PARENT_SCOPE + ) +endfunction() + +# Fail the build if the src lists in build_variables.bzl do not match the src +# lists extracted from Buck and placed into EXECUTORCH_SRCS_FILE. This is +# intended to be a safety mechanism while we are in the process of removing Buck +# from the CMake build and replacing it with build_variables.bzl; if you are +# seeing failures after you have intentionally changed Buck srcs, then simply +# update build_variables.bzl. If you are seeing failures after changing +# something about the build system, make sure your changes will work both before +# and after we finish replacing Buck with build_variables.bzl, which should +# involve getting these lists to match! +function(validate_build_variables) + include(${EXECUTORCH_SRCS_FILE}) + set(BUILD_VARIABLES_FILELISTS + EXECUTORCH_SRCS + EXECUTORCH_CORE_SRCS + PORTABLE_KERNELS_SRCS + OPTIMIZED_KERNELS_SRCS + QUANTIZED_KERNELS_SRCS + PROGRAM_SCHEMA_SRCS + OPTIMIZED_CPUBLAS_SRCS + OPTIMIZED_NATIVE_CPU_OPS_SRCS + EXTENSION_DATA_LOADER_SRCS + EXTENSION_MODULE_SRCS + EXTENSION_RUNNER_UTIL_SRCS + EXTENSION_LLM_RUNNER_SRCS + EXTENSION_TENSOR_SRCS + EXTENSION_THREADPOOL_SRCS + EXTENSION_TRAINING_SRCS + TRAIN_XOR_SRCS + EXECUTOR_RUNNER_SRCS + SIZE_TEST_SRCS + MPS_EXECUTOR_RUNNER_SRCS + MPS_BACKEND_SRCS + MPS_SCHEMA_SRCS + XNN_EXECUTOR_RUNNER_SRCS + XNNPACK_BACKEND_SRCS + XNNPACK_SCHEMA_SRCS + VULKAN_SCHEMA_SRCS + CUSTOM_OPS_SRCS + LLAMA_RUNNER_SRCS + ) + set(BUILD_VARIABLES_VARNAMES + _executorch__srcs + _executorch_core__srcs + _portable_kernels__srcs + _optimized_kernels__srcs + _quantized_kernels__srcs + _program_schema__srcs + _optimized_cpublas__srcs + _optimized_native_cpu_ops__srcs + _extension_data_loader__srcs + _extension_module__srcs + _extension_runner_util__srcs + _extension_llm_runner__srcs + _extension_tensor__srcs + _extension_threadpool__srcs + _extension_training__srcs + _train_xor__srcs + _executor_runner__srcs + _size_test__srcs + _mps_executor_runner__srcs + _mps_backend__srcs + _mps_schema__srcs + _xnn_executor_runner__srcs + _xnnpack_backend__srcs + _xnnpack_schema__srcs + _vulkan_schema__srcs + _custom_ops__srcs + _llama_runner__srcs + ) + foreach(filelist_and_varname IN ZIP_LISTS BUILD_VARIABLES_FILELISTS + BUILD_VARIABLES_VARNAMES + ) + if("${filelist_and_varname_1}" STREQUAL "_custom_ops__srcs") + continue() + endif() + append_filelist( + ${filelist_and_varname_0} + "${filelist_and_varname_1}_from_build_variables" + ) + if(NOT ${filelist_and_varname_1} STREQUAL + ${filelist_and_varname_1}_from_build_variables + ) + message( + FATAL_ERROR + "Buck-generated ${filelist_and_varname_1} does not match hardcoded " + "${filelist_and_varname_0} in build_variables.bzl. Left: " + "${${filelist_and_varname_1}}\n " + "Right: ${${filelist_and_varname_1}_from_build_variables}" + ) + endif() + endforeach() +endfunction() diff --git a/build/Test.cmake b/build/Test.cmake index 31e5aaf4d63..dbe590d610b 100644 --- a/build/Test.cmake +++ b/build/Test.cmake @@ -36,6 +36,7 @@ function(et_cxx_test target_name) cmake_parse_arguments(ET_CXX_TEST "" "" "${multi_arg_names}" ${ARGN}) add_executable(${target_name} ${ET_CXX_TEST_SOURCES} ${EXECUTORCH_ROOT}/runtime/core/exec_aten/testing_util/tensor_util.cpp) + find_package(GTest) # Includes gtest, gmock, executorch by default target_link_libraries( ${target_name} GTest::gtest GTest::gtest_main GTest::gmock executorch diff --git a/build/Utils.cmake b/build/Utils.cmake index 646ef5ff285..6e7459430b7 100644 --- a/build/Utils.cmake +++ b/build/Utils.cmake @@ -329,29 +329,25 @@ function(resolve_python_executable) endfunction() # find_package(Torch CONFIG REQUIRED) replacement for targets that have a -# header-only Torch dependency. Because find_package sets variables in the -# parent scope, we use a macro to preserve this rather than maintaining our own -# list of those variables. -macro(find_package_torch_headers) - # We cannot simply use CMAKE_FIND_ROOT_PATH_BOTH, because that does not - # propagate into TorchConfig.cmake. - foreach(mode_kind IN ITEMS PACKAGE LIBRARY INCLUDE) - set(OLD_CMAKE_FIND_ROOT_PATH_MODE_${mode_kind} - ${CMAKE_FIND_ROOT_PATH_MODE_${mode_kind}} - ) - set(CMAKE_FIND_ROOT_PATH_MODE_${mode_kind} BOTH) - endforeach() - find_package_torch() - foreach(mode_kind IN ITEMS PACKAGE LIBRARY INCLUDE) - set(CMAKE_FIND_ROOT_PATH_MODE_${mode_kind} - ${OLD_CMAKE_FIND_ROOT_PATH_MODE_${mode_kind}} - ) - endforeach() -endmacro() +# header-only Torch dependency. +# +# Unlike find_package(Torch ...), this will only set +# TORCH_INCLUDE_DIRS in the parent scope. In particular, it will NOT +# set any of the following: +# - TORCH_FOUND +# - TORCH_LIBRARY +# - TORCH_CXX_FLAGS +function(find_package_torch_headers) + # We implement this way rather than using find_package so that + # cross-compilation can still use the host's installed copy of + # torch, since the headers should be fine. + get_torch_base_path(TORCH_BASE_PATH) + set(TORCH_INCLUDE_DIRS "${TORCH_BASE_PATH}/include;${TORCH_BASE_PATH}/include/torch/csrc/api/include" PARENT_SCOPE) +endfunction() -# Add the Torch CMake configuration to CMAKE_PREFIX_PATH so that find_package -# can find Torch. -function(add_torch_to_cmake_prefix_path) +# Return the base path to the installed Torch Python library in +# outVar. +function(get_torch_base_path outVar) if(NOT PYTHON_EXECUTABLE) resolve_python_executable() endif() @@ -370,6 +366,13 @@ function(add_torch_to_cmake_prefix_path) message("Output:\n${_tmp_torch_path}") message(FATAL_ERROR "Error:\n${_tmp_torch_path_error}") endif() + set(${outVar} ${_tmp_torch_path} PARENT_SCOPE) +endfunction() + +# Add the Torch CMake configuration to CMAKE_PREFIX_PATH so that find_package +# can find Torch. +function(add_torch_to_cmake_prefix_path) + get_torch_base_path(_tmp_torch_path) list(APPEND CMAKE_PREFIX_PATH "${_tmp_torch_path}") set(CMAKE_PREFIX_PATH "${CMAKE_PREFIX_PATH}" diff --git a/build/build_android_llm_demo.sh b/build/build_android_llm_demo.sh index 4119bde4c7e..cb2d47fdeb3 100644 --- a/build/build_android_llm_demo.sh +++ b/build/build_android_llm_demo.sh @@ -44,6 +44,7 @@ build_android_native_library() { -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \ -DANDROID_ABI="${ANDROID_ABI}" \ -DANDROID_PLATFORM=android-26 \ + -DBUILD_TESTING=OFF \ -DEXECUTORCH_ENABLE_LOGGING=ON \ -DEXECUTORCH_LOG_LEVEL=Info \ -DEXECUTORCH_BUILD_XNNPACK=ON \ @@ -73,6 +74,7 @@ build_android_native_library() { -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \ -DANDROID_ABI="${ANDROID_ABI}" \ -DANDROID_PLATFORM=android-26 \ + -DBUILD_TESTING=OFF \ -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \ -DEXECUTORCH_ENABLE_LOGGING=ON \ -DEXECUTORCH_LOG_LEVEL=Info \ diff --git a/build/cmake_deps.toml b/build/cmake_deps.toml index 21a8e282929..4b22a09cb5b 100644 --- a/build/cmake_deps.toml +++ b/build/cmake_deps.toml @@ -386,10 +386,7 @@ buck_targets = [ "//extension/llm/custom_ops:custom_ops", ] filters = [ - # Second clause is to pick up fht_neon.c/fht_avx.c from FFHT. TODO: - # remove filters and patch extract_sources.py's Buck query to fetch - # srcs; presumably filters is here to remove .h files. - "(.cpp$)|(fht.*\\.c$)", + ".cpp$", ] excludes = [ "^codegen", diff --git a/extension/android_test/setup.sh b/extension/android_test/setup.sh index 725728b8092..c21d2c09623 100755 --- a/extension/android_test/setup.sh +++ b/extension/android_test/setup.sh @@ -21,6 +21,7 @@ build_native_library() { cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \ -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \ -DANDROID_ABI="${ANDROID_ABI}" \ + -DBUILD_TESTING=OFF \ -DEXECUTORCH_BUILD_XNNPACK=ON \ -DEXECUTORCH_XNNPACK_SHARED_WORKSPACE=ON \ -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ @@ -36,6 +37,7 @@ build_native_library() { cmake extension/android \ -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}"/build/cmake/android.toolchain.cmake \ -DANDROID_ABI="${ANDROID_ABI}" \ + -DBUILD_TESTING=OFF \ -DCMAKE_INSTALL_PREFIX=c"${CMAKE_OUT}" \ -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ -DEXECUTORCH_BUILD_LLAMA_JNI=ON \ diff --git a/extension/llm/custom_ops/CMakeLists.txt b/extension/llm/custom_ops/CMakeLists.txt index 95f4bc559fa..c3969e6f9bf 100644 --- a/extension/llm/custom_ops/CMakeLists.txt +++ b/extension/llm/custom_ops/CMakeLists.txt @@ -45,6 +45,22 @@ list(APPEND custom_ops_libs cpuinfo) list(APPEND custom_ops_libs cpublas) list(APPEND custom_ops_libs eigen_blas) +if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|armv7)$") + list(APPEND _custom_ops__srcs + "extension/llm/custom_ops/spinquant/third-party/FFHT/fht_neon.c" + ) +elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + list(APPEND _custom_ops__srcs + "extension/llm/custom_ops/spinquant/third-party/FFHT/fht_avx.c" + ) +else() + message( + FATAL_ERROR + "Unsupported CMAKE_SYSTEM_PROCESSOR ${CMAKE_SYSTEM_PROCESSOR}. (If \ +32-bit x86, try using fht_avx.c and send a PR if it works!)" + ) +endif() + list(TRANSFORM _custom_ops__srcs PREPEND "${EXECUTORCH_ROOT}/") if(NOT EXECUTORCH_BUILD_XNNPACK) @@ -121,3 +137,8 @@ if(EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT) LIBRARY DESTINATION executorch/extension/llm/custom_ops ) endif() + +add_subdirectory(spinquant/third-party/FFHT) +if(BUILD_TESTING) + add_subdirectory(spinquant/test) +endif() diff --git a/extension/llm/custom_ops/spinquant/test/CMakeLists.txt b/extension/llm/custom_ops/spinquant/test/CMakeLists.txt new file mode 100644 index 00000000000..c793d2ed975 --- /dev/null +++ b/extension/llm/custom_ops/spinquant/test/CMakeLists.txt @@ -0,0 +1,30 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# @generated by test/utils/generate_gtest_cmakelists.py +# +# This file should be formatted with +# ~~~ +# cmake-format -i CMakeLists.txt +# ~~~ +# It should also be cmake-lint clean. +# + +cmake_minimum_required(VERSION 3.19) + +set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../../..) + +include(${EXECUTORCH_ROOT}/build/Test.cmake) + +set(_test_srcs + fast_hadamard_transform_test.cpp fast_hadamard_transform_test_impl.cpp + op_fast_hadamard_transform_test.cpp +) + +et_cxx_test( + extension_llm_custom_ops_spinquant_test SOURCES ${_test_srcs} EXTRA_LIBS + custom_ops dumb_fht +) diff --git a/extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeLists.txt b/extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeLists.txt new file mode 100644 index 00000000000..2e3089be72e --- /dev/null +++ b/extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeLists.txt @@ -0,0 +1,12 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# Please this file formatted by running: +# ~~~ +# cmake-format -i CMakeLists.txt +# ~~~ + +add_library(dumb_fht dumb_fht.c) diff --git a/shim_et/xplat/executorch/build/build_variables.bzl b/shim_et/xplat/executorch/build/build_variables.bzl new file mode 100644 index 00000000000..8f89e6b5267 --- /dev/null +++ b/shim_et/xplat/executorch/build/build_variables.bzl @@ -0,0 +1,497 @@ +# WARNING: the contents of this file must BOTH be valid Starlark (for Buck) as well as +# valid Python (for our cmake build). This means that load() directives are not allowed +# (as they are not recognized by Python). If you want to fix this, figure out how run +# this file from cmake with a proper Starlark interpreter as part of the default OSS +# build process. If you need some nontrivial Starlark features, make a separate bzl +# file. (Remember that bzl files are not exported via ShipIt by default, so you may also +# need to update ExecuTorch's ShipIt config.) + +# This file contains srcs lists that are shared between our Buck and CMake build +# systems. We had three choices for listing src files: +# 1) List them in Buck and use buck query to get them in CMake. This was our setup for a +# long time; the problem is that OSS users would prefer not to have to deal with Buck at +# all. +# 2) List them in both Buck targets.bzl files and CMake's CMakeLists.txt files. This is +# unnecessary duplication, and people will invariably forget to update one or the other +# 3) List them somewhere CMake and Buck can both get at them; that's this file. Buck +# files can load() it, and our CMake build evaluates it with Python. (See +# append_filelist in build/Codegen.cmake.) +# +# Inconveniently, the Buck target layout is much more granular than the CMake library +# layout, leading to several complications: +# 1) Single-file Buck targets will just list the one src file they contain. Nothing to +# share with CMake in that case, and that src will be in a list in this file that does +# not map directly to that particular Buck target. +# 2) Multi-file Buck targets should have a list below that corresponds exactly to their +# `srcs`. There should then be simple Python code that combines those lists into lists +# that map 1:1 to the CMake library layout. + +EXECUTORCH_SRCS = [ + "kernels/prim_ops/et_copy_index.cpp", + "kernels/prim_ops/et_view.cpp", + "kernels/prim_ops/register_prim_ops.cpp", +] + +EXECUTORCH_CORE_SRCS = [ + "runtime/backend/interface.cpp", + "runtime/core/evalue.cpp", + "runtime/core/exec_aten/util/tensor_shape_to_c_string.cpp", + "runtime/core/exec_aten/util/tensor_util_portable.cpp", + "runtime/core/portable_type/tensor_impl.cpp", + "runtime/core/tag.cpp", + "runtime/core/tensor_layout.cpp", + "runtime/executor/method.cpp", + "runtime/executor/method_meta.cpp", + "runtime/executor/program.cpp", + "runtime/executor/tensor_parser_exec_aten.cpp", + "runtime/executor/tensor_parser_portable.cpp", + "runtime/kernel/operator_registry.cpp", + "runtime/platform/abort.cpp", + "runtime/platform/default/posix.cpp", + "runtime/platform/log.cpp", + "runtime/platform/profiler.cpp", + "runtime/platform/runtime.cpp", + "schema/extended_header.cpp", +] + +PORTABLE_KERNELS_SRCS = [ + "kernels/portable/cpu/op__empty_dim_order.cpp", + "kernels/portable/cpu/op__to_dim_order_copy.cpp", + "kernels/portable/cpu/op_abs.cpp", + "kernels/portable/cpu/op_acos.cpp", + "kernels/portable/cpu/op_acosh.cpp", + "kernels/portable/cpu/op_add.cpp", + "kernels/portable/cpu/op_addmm.cpp", + "kernels/portable/cpu/op_alias_copy.cpp", + "kernels/portable/cpu/op_allclose.cpp", + "kernels/portable/cpu/op_amax.cpp", + "kernels/portable/cpu/op_amin.cpp", + "kernels/portable/cpu/op_any.cpp", + "kernels/portable/cpu/op_arange.cpp", + "kernels/portable/cpu/op_argmax.cpp", + "kernels/portable/cpu/op_argmin.cpp", + "kernels/portable/cpu/op_as_strided_copy.cpp", + "kernels/portable/cpu/op_asin.cpp", + "kernels/portable/cpu/op_asinh.cpp", + "kernels/portable/cpu/op_atan.cpp", + "kernels/portable/cpu/op_atan2.cpp", + "kernels/portable/cpu/op_atanh.cpp", + "kernels/portable/cpu/op_avg_pool2d.cpp", + "kernels/portable/cpu/op_bitwise_and.cpp", + "kernels/portable/cpu/op_bitwise_not.cpp", + "kernels/portable/cpu/op_bitwise_or.cpp", + "kernels/portable/cpu/op_bitwise_xor.cpp", + "kernels/portable/cpu/op_bmm.cpp", + "kernels/portable/cpu/op_cat.cpp", + "kernels/portable/cpu/op_cdist_forward.cpp", + "kernels/portable/cpu/op_ceil.cpp", + "kernels/portable/cpu/op_clamp.cpp", + "kernels/portable/cpu/op_clone.cpp", + "kernels/portable/cpu/op_constant_pad_nd.cpp", + "kernels/portable/cpu/op_convolution.cpp", + "kernels/portable/cpu/op_convolution_backward.cpp", + "kernels/portable/cpu/op_copy.cpp", + "kernels/portable/cpu/op_cos.cpp", + "kernels/portable/cpu/op_cosh.cpp", + "kernels/portable/cpu/op_cumsum.cpp", + "kernels/portable/cpu/op_detach_copy.cpp", + "kernels/portable/cpu/op_diagonal_copy.cpp", + "kernels/portable/cpu/op_div.cpp", + "kernels/portable/cpu/op_embedding.cpp", + "kernels/portable/cpu/op_empty.cpp", + "kernels/portable/cpu/op_eq.cpp", + "kernels/portable/cpu/op_erf.cpp", + "kernels/portable/cpu/op_exp.cpp", + "kernels/portable/cpu/op_expand_copy.cpp", + "kernels/portable/cpu/op_expm1.cpp", + "kernels/portable/cpu/op_fill.cpp", + "kernels/portable/cpu/op_flip.cpp", + "kernels/portable/cpu/op_floor.cpp", + "kernels/portable/cpu/op_floor_divide.cpp", + "kernels/portable/cpu/op_fmod.cpp", + "kernels/portable/cpu/op_full.cpp", + "kernels/portable/cpu/op_full_like.cpp", + "kernels/portable/cpu/op_gather.cpp", + "kernels/portable/cpu/op_ge.cpp", + "kernels/portable/cpu/op_gelu.cpp", + "kernels/portable/cpu/op_glu.cpp", + "kernels/portable/cpu/op_gt.cpp", + "kernels/portable/cpu/op_hardtanh.cpp", + "kernels/portable/cpu/op_index.cpp", + "kernels/portable/cpu/op_index_put.cpp", + "kernels/portable/cpu/op_index_select.cpp", + "kernels/portable/cpu/op_isinf.cpp", + "kernels/portable/cpu/op_isnan.cpp", + "kernels/portable/cpu/op_le.cpp", + "kernels/portable/cpu/op_leaky_relu.cpp", + "kernels/portable/cpu/op_lift_fresh_copy.cpp", + "kernels/portable/cpu/op_linear_scratch_example.cpp", + "kernels/portable/cpu/op_log.cpp", + "kernels/portable/cpu/op_log10.cpp", + "kernels/portable/cpu/op_log1p.cpp", + "kernels/portable/cpu/op_log2.cpp", + "kernels/portable/cpu/op_log_softmax.cpp", + "kernels/portable/cpu/op_logical_and.cpp", + "kernels/portable/cpu/op_logical_not.cpp", + "kernels/portable/cpu/op_logical_or.cpp", + "kernels/portable/cpu/op_logical_xor.cpp", + "kernels/portable/cpu/op_logit.cpp", + "kernels/portable/cpu/op_lt.cpp", + "kernels/portable/cpu/op_masked_fill.cpp", + "kernels/portable/cpu/op_masked_scatter.cpp", + "kernels/portable/cpu/op_masked_select.cpp", + "kernels/portable/cpu/op_max.cpp", + "kernels/portable/cpu/op_max_pool2d_with_indices.cpp", + "kernels/portable/cpu/op_maximum.cpp", + "kernels/portable/cpu/op_mean.cpp", + "kernels/portable/cpu/op_min.cpp", + "kernels/portable/cpu/op_minimum.cpp", + "kernels/portable/cpu/op_mm.cpp", + "kernels/portable/cpu/op_mul.cpp", + "kernels/portable/cpu/op_narrow_copy.cpp", + "kernels/portable/cpu/op_native_batch_norm.cpp", + "kernels/portable/cpu/op_native_group_norm.cpp", + "kernels/portable/cpu/op_native_layer_norm.cpp", + "kernels/portable/cpu/op_ne.cpp", + "kernels/portable/cpu/op_neg.cpp", + "kernels/portable/cpu/op_nonzero.cpp", + "kernels/portable/cpu/op_ones.cpp", + "kernels/portable/cpu/op_pdist_forward.cpp", + "kernels/portable/cpu/op_permute_copy.cpp", + "kernels/portable/cpu/op_pixel_shuffle.cpp", + "kernels/portable/cpu/op_pixel_unshuffle.cpp", + "kernels/portable/cpu/op_pow.cpp", + "kernels/portable/cpu/op_prod.cpp", + "kernels/portable/cpu/op_reciprocal.cpp", + "kernels/portable/cpu/op_reflection_pad1d.cpp", + "kernels/portable/cpu/op_reflection_pad2d.cpp", + "kernels/portable/cpu/op_reflection_pad3d.cpp", + "kernels/portable/cpu/op_relu.cpp", + "kernels/portable/cpu/op_remainder.cpp", + "kernels/portable/cpu/op_repeat.cpp", + "kernels/portable/cpu/op_repeat_interleave.cpp", + "kernels/portable/cpu/op_replication_pad1d.cpp", + "kernels/portable/cpu/op_replication_pad2d.cpp", + "kernels/portable/cpu/op_replication_pad3d.cpp", + "kernels/portable/cpu/op_roll.cpp", + "kernels/portable/cpu/op_round.cpp", + "kernels/portable/cpu/op_rsqrt.cpp", + "kernels/portable/cpu/op_rsub.cpp", + "kernels/portable/cpu/op_scalar_tensor.cpp", + "kernels/portable/cpu/op_scatter.cpp", + "kernels/portable/cpu/op_scatter_add.cpp", + "kernels/portable/cpu/op_select_copy.cpp", + "kernels/portable/cpu/op_select_scatter.cpp", + "kernels/portable/cpu/op_sigmoid.cpp", + "kernels/portable/cpu/op_sign.cpp", + "kernels/portable/cpu/op_sin.cpp", + "kernels/portable/cpu/op_sinh.cpp", + "kernels/portable/cpu/op_slice_copy.cpp", + "kernels/portable/cpu/op_slice_scatter.cpp", + "kernels/portable/cpu/op_softmax.cpp", + "kernels/portable/cpu/op_split_copy.cpp", + "kernels/portable/cpu/op_split_with_sizes_copy.cpp", + "kernels/portable/cpu/op_sqrt.cpp", + "kernels/portable/cpu/op_squeeze_copy.cpp", + "kernels/portable/cpu/op_stack.cpp", + "kernels/portable/cpu/op_sub.cpp", + "kernels/portable/cpu/op_sum.cpp", + "kernels/portable/cpu/op_t_copy.cpp", + "kernels/portable/cpu/op_tan.cpp", + "kernels/portable/cpu/op_tanh.cpp", + "kernels/portable/cpu/op_to_copy.cpp", + "kernels/portable/cpu/op_topk.cpp", + "kernels/portable/cpu/op_transpose_copy.cpp", + "kernels/portable/cpu/op_tril.cpp", + "kernels/portable/cpu/op_trunc.cpp", + "kernels/portable/cpu/op_unbind_copy.cpp", + "kernels/portable/cpu/op_unsqueeze_copy.cpp", + "kernels/portable/cpu/op_upsample_bilinear2d.cpp", + "kernels/portable/cpu/op_upsample_nearest2d.cpp", + "kernels/portable/cpu/op_var.cpp", + "kernels/portable/cpu/op_view_copy.cpp", + "kernels/portable/cpu/op_where.cpp", + "kernels/portable/cpu/op_zeros.cpp", + "kernels/portable/cpu/pattern/unary_ufunc_realh.cpp", + "kernels/portable/cpu/pattern/unary_ufunc_realhb_to_bool.cpp", + "kernels/portable/cpu/pattern/unary_ufunc_realhbbf16_to_floathbf16.cpp", + "kernels/portable/cpu/util/activation_ops_util.cpp", + "kernels/portable/cpu/util/advanced_index_util.cpp", + "kernels/portable/cpu/util/broadcast_util.cpp", + "kernels/portable/cpu/util/copy_ops_util.cpp", + "kernels/portable/cpu/util/distance_util.cpp", + "kernels/portable/cpu/util/dtype_util.cpp", + "kernels/portable/cpu/util/index_util.cpp", + "kernels/portable/cpu/util/kernel_ops_util.cpp", + "kernels/portable/cpu/util/matmul_ops_util.cpp", + "kernels/portable/cpu/util/normalization_ops_util.cpp", + "kernels/portable/cpu/util/padding_util.cpp", + "kernels/portable/cpu/util/reduce_util.cpp", + "kernels/portable/cpu/util/repeat_util.cpp", + "kernels/portable/cpu/util/select_copy_util.cpp", + "kernels/portable/cpu/util/slice_util.cpp", + "kernels/portable/cpu/util/upsample_util.cpp", +] + +OPTIMIZED_KERNELS_SRCS = [ + "extension/parallel/thread_parallel.cpp", + "kernels/optimized/blas/BlasKernel.cpp", + "kernels/optimized/blas/CPUBlas.cpp", + "kernels/optimized/cpu/op_add.cpp", + "kernels/optimized/cpu/op_bmm.cpp", + "kernels/optimized/cpu/op_div.cpp", + "kernels/optimized/cpu/op_exp.cpp", + "kernels/optimized/cpu/op_fft_r2c.cpp", + "kernels/optimized/cpu/op_gelu.cpp", + "kernels/optimized/cpu/op_le.cpp", + "kernels/optimized/cpu/op_linear.cpp", + "kernels/optimized/cpu/op_log_softmax.cpp", + "kernels/optimized/cpu/op_mm.cpp", + "kernels/optimized/cpu/op_mul.cpp", + "kernels/optimized/cpu/op_native_layer_norm.cpp", + "kernels/optimized/cpu/op_neg.cpp", + "kernels/optimized/cpu/op_sigmoid.cpp", + "kernels/optimized/cpu/op_sub.cpp", +] + +QUANTIZED_KERNELS_SRCS = [ + "kernels/quantized/cpu/embeddingxb.cpp", + "kernels/quantized/cpu/op_add.cpp", + "kernels/quantized/cpu/op_choose_qparams.cpp", + "kernels/quantized/cpu/op_dequantize.cpp", + "kernels/quantized/cpu/op_embedding.cpp", + "kernels/quantized/cpu/op_embedding2b.cpp", + "kernels/quantized/cpu/op_embedding4b.cpp", + "kernels/quantized/cpu/op_mixed_linear.cpp", + "kernels/quantized/cpu/op_mixed_mm.cpp", + "kernels/quantized/cpu/op_quantize.cpp", +] + +PROGRAM_SCHEMA_SRCS = [ + "schema/program.fbs", + "schema/scalar_type.fbs", +] + +OPTIMIZED_CPUBLAS_SRCS = [ + "extension/parallel/thread_parallel.cpp", + "extension/threadpool/threadpool.cpp", + "extension/threadpool/threadpool_guard.cpp", + "kernels/optimized/blas/BlasKernel.cpp", + "kernels/optimized/blas/CPUBlas.cpp", +] + +OPTIMIZED_NATIVE_CPU_OPS_SRCS = [ + "codegen/templates/RegisterCodegenUnboxedKernels.cpp", + "codegen/templates/RegisterDispatchKeyCustomOps.cpp", + "codegen/templates/RegisterKernels.cpp", + "codegen/templates/RegisterSchema.cpp", + "extension/parallel/thread_parallel.cpp", + "extension/threadpool/threadpool.cpp", + "extension/threadpool/threadpool_guard.cpp", + "kernels/optimized/blas/BlasKernel.cpp", + "kernels/optimized/blas/CPUBlas.cpp", + "kernels/optimized/cpu/op_add.cpp", + "kernels/optimized/cpu/op_bmm.cpp", + "kernels/optimized/cpu/op_div.cpp", + "kernels/optimized/cpu/op_exp.cpp", + "kernels/optimized/cpu/op_fft_r2c.cpp", + "kernels/optimized/cpu/op_gelu.cpp", + "kernels/optimized/cpu/op_le.cpp", + "kernels/optimized/cpu/op_linear.cpp", + "kernels/optimized/cpu/op_log_softmax.cpp", + "kernels/optimized/cpu/op_mm.cpp", + "kernels/optimized/cpu/op_mul.cpp", + "kernels/optimized/cpu/op_native_layer_norm.cpp", + "kernels/optimized/cpu/op_neg.cpp", + "kernels/optimized/cpu/op_sigmoid.cpp", + "kernels/optimized/cpu/op_sub.cpp", +] + +EXTENSION_DATA_LOADER_SRCS = [ + "extension/data_loader/file_data_loader.cpp", + "extension/data_loader/mmap_data_loader.cpp", +] + +EXTENSION_MODULE_SRCS = [ + "extension/flat_tensor/flat_tensor_data_map.cpp", + "extension/flat_tensor/serialize/flat_tensor_header.cpp", + "extension/module/module.cpp", +] + +EXTENSION_RUNNER_UTIL_SRCS = [ + "extension/runner_util/inputs.cpp", + "extension/runner_util/inputs_portable.cpp", +] + +EXTENSION_LLM_RUNNER_SRCS = [ + "extension/data_loader/file_data_loader.cpp", + "extension/data_loader/mmap_data_loader.cpp", + "extension/llm/runner/text_decoder_runner.cpp", + "extension/llm/runner/text_prefiller.cpp", + "extension/llm/sampler/sampler.cpp", + "extension/tensor/tensor_ptr.cpp", + "extension/tensor/tensor_ptr_maker.cpp", +] + +EXTENSION_TENSOR_SRCS = [ + "extension/tensor/tensor_ptr.cpp", + "extension/tensor/tensor_ptr_maker.cpp", +] + +EXTENSION_THREADPOOL_SRCS = [ + "extension/threadpool/threadpool.cpp", + "extension/threadpool/threadpool_guard.cpp", +] + +EXTENSION_TRAINING_SRCS = [ + "extension/data_loader/file_data_loader.cpp", + "extension/data_loader/mmap_data_loader.cpp", + "extension/flat_tensor/flat_tensor_data_map.cpp", + "extension/flat_tensor/serialize/flat_tensor_header.cpp", + "extension/module/module.cpp", + "extension/training/module/training_module.cpp", + "extension/training/optimizer/sgd.cpp", + "kernels/prim_ops/et_copy_index.cpp", + "kernels/prim_ops/et_view.cpp", + "kernels/prim_ops/register_prim_ops.cpp", +] + +TRAIN_XOR_SRCS = [ + "extension/data_loader/file_data_loader.cpp", + "extension/data_loader/mmap_data_loader.cpp", + "extension/flat_tensor/flat_tensor_data_map.cpp", + "extension/flat_tensor/serialize/flat_tensor_header.cpp", + "extension/flat_tensor/serialize/serialize.cpp", + "extension/module/module.cpp", + "extension/tensor/tensor_ptr.cpp", + "extension/tensor/tensor_ptr_maker.cpp", + "extension/training/examples/XOR/train.cpp", + "extension/training/module/training_module.cpp", + "extension/training/optimizer/sgd.cpp", +] + +EXECUTOR_RUNNER_SRCS = [ + "examples/portable/executor_runner/executor_runner.cpp", + "extension/data_loader/file_data_loader.cpp", + "extension/evalue_util/print_evalue.cpp", + "extension/runner_util/inputs.cpp", + "extension/runner_util/inputs_portable.cpp", + "runtime/executor/test/test_backend_compiler_lib.cpp", +] + +SIZE_TEST_SRCS = [ + "extension/data_loader/file_data_loader.cpp", + "test/size_test.cpp", +] + +MPS_EXECUTOR_RUNNER_SRCS = [ + "backends/apple/mps/runtime/MPSBackend.mm", + "backends/apple/mps/runtime/MPSCompiler.mm", + "backends/apple/mps/runtime/MPSDelegateHeader.mm", + "backends/apple/mps/runtime/MPSDevice.mm", + "backends/apple/mps/runtime/MPSExecutor.mm", + "backends/apple/mps/runtime/MPSGraphBuilder.mm", + "backends/apple/mps/runtime/MPSStream.mm", + "backends/apple/mps/runtime/operations/ActivationOps.mm", + "backends/apple/mps/runtime/operations/BinaryOps.mm", + "backends/apple/mps/runtime/operations/ClampOps.mm", + "backends/apple/mps/runtime/operations/ConstantOps.mm", + "backends/apple/mps/runtime/operations/ConvolutionOps.mm", + "backends/apple/mps/runtime/operations/IndexingOps.mm", + "backends/apple/mps/runtime/operations/LinearAlgebra.mm", + "backends/apple/mps/runtime/operations/NormalizationOps.mm", + "backends/apple/mps/runtime/operations/OperationUtils.mm", + "backends/apple/mps/runtime/operations/PadOps.mm", + "backends/apple/mps/runtime/operations/PoolingOps.mm", + "backends/apple/mps/runtime/operations/QuantDequant.mm", + "backends/apple/mps/runtime/operations/RangeOps.mm", + "backends/apple/mps/runtime/operations/ReduceOps.mm", + "backends/apple/mps/runtime/operations/ShapeOps.mm", + "backends/apple/mps/runtime/operations/UnaryOps.mm", + "devtools/bundled_program/bundled_program.cpp", + "devtools/etdump/emitter.cpp", + "devtools/etdump/etdump_flatcc.cpp", + "examples/apple/mps/executor_runner/mps_executor_runner.mm", + "extension/data_loader/file_data_loader.cpp", + "extension/evalue_util/print_evalue.cpp", + "extension/runner_util/inputs.cpp", + "extension/runner_util/inputs_portable.cpp", +] + +MPS_BACKEND_SRCS = [ + "backends/apple/mps/runtime/MPSBackend.mm", + "backends/apple/mps/runtime/MPSCompiler.mm", + "backends/apple/mps/runtime/MPSDelegateHeader.mm", + "backends/apple/mps/runtime/MPSDevice.mm", + "backends/apple/mps/runtime/MPSExecutor.mm", + "backends/apple/mps/runtime/MPSGraphBuilder.mm", + "backends/apple/mps/runtime/MPSStream.mm", + "backends/apple/mps/runtime/operations/ActivationOps.mm", + "backends/apple/mps/runtime/operations/BinaryOps.mm", + "backends/apple/mps/runtime/operations/ClampOps.mm", + "backends/apple/mps/runtime/operations/ConstantOps.mm", + "backends/apple/mps/runtime/operations/ConvolutionOps.mm", + "backends/apple/mps/runtime/operations/IndexingOps.mm", + "backends/apple/mps/runtime/operations/LinearAlgebra.mm", + "backends/apple/mps/runtime/operations/NormalizationOps.mm", + "backends/apple/mps/runtime/operations/OperationUtils.mm", + "backends/apple/mps/runtime/operations/PadOps.mm", + "backends/apple/mps/runtime/operations/PoolingOps.mm", + "backends/apple/mps/runtime/operations/QuantDequant.mm", + "backends/apple/mps/runtime/operations/RangeOps.mm", + "backends/apple/mps/runtime/operations/ReduceOps.mm", + "backends/apple/mps/runtime/operations/ShapeOps.mm", + "backends/apple/mps/runtime/operations/UnaryOps.mm", +] + +MPS_SCHEMA_SRCS = [ + "backends/apple/mps/serialization/schema.fbs", +] + +XNN_EXECUTOR_RUNNER_SRCS = [ + "examples/portable/executor_runner/executor_runner.cpp", + "extension/data_loader/file_data_loader.cpp", + "extension/evalue_util/print_evalue.cpp", + "extension/runner_util/inputs.cpp", + "extension/runner_util/inputs_portable.cpp", +] + +XNNPACK_BACKEND_SRCS = [ + "backends/xnnpack/runtime/XNNCompiler.cpp", + "backends/xnnpack/runtime/XNNExecutor.cpp", + "backends/xnnpack/runtime/XNNHeader.cpp", + "backends/xnnpack/runtime/XNNPACKBackend.cpp", + "backends/xnnpack/runtime/profiling/XNNProfiler.cpp", + "extension/threadpool/threadpool.cpp", + "extension/threadpool/threadpool_guard.cpp", +] + +XNNPACK_SCHEMA_SRCS = [ + "backends/xnnpack/serialization/runtime_schema.fbs", +] + +VULKAN_SCHEMA_SRCS = [ + "backends/vulkan/serialization/schema.fbs", +] + +CUSTOM_OPS_SRCS = [ + "extension/llm/custom_ops/op_fallback.cpp", + "extension/llm/custom_ops/op_fast_hadamard_transform.cpp", + "extension/llm/custom_ops/op_sdpa.cpp", + "extension/llm/custom_ops/op_update_cache.cpp", + "extension/llm/custom_ops/spinquant/fast_hadamard_transform.cpp", + "kernels/portable/cpu/util/reduce_util.cpp", +] + +LLAMA_RUNNER_SRCS = [ + "examples/models/llama/runner/runner.cpp", + "examples/models/llama/tokenizer/llama_tiktoken.cpp", + "extension/evalue_util/print_evalue.cpp", + "extension/llm/runner/text_decoder_runner.cpp", + "extension/llm/runner/text_prefiller.cpp", + "extension/llm/sampler/sampler.cpp", + "extension/llm/tokenizer/bpe_tokenizer.cpp", + "extension/llm/tokenizer/tiktoken.cpp", + "extension/tensor/tensor_ptr.cpp", + "extension/tensor/tensor_ptr_maker.cpp", +] diff --git a/test/run_oss_cpp_tests.sh b/test/run_oss_cpp_tests.sh index f747100006d..ff2ed048257 100755 --- a/test/run_oss_cpp_tests.sh +++ b/test/run_oss_cpp_tests.sh @@ -35,6 +35,7 @@ build_executorch() { cmake . \ -DCMAKE_INSTALL_PREFIX=cmake-out \ -DEXECUTORCH_USE_CPP_CODE_COVERAGE=ON \ + -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ diff --git a/test/utils/OSSTestConfig.json b/test/utils/OSSTestConfig.json index 6eff74eec86..70cb2d2e44f 100644 --- a/test/utils/OSSTestConfig.json +++ b/test/utils/OSSTestConfig.json @@ -1,4 +1,15 @@ { "tests": [ + { + "directory": "extension/llm/custom_ops/spinquant/test", + "sources": [ + "fast_hadamard_transform_test.cpp", + "fast_hadamard_transform_test_impl.cpp", + "op_fast_hadamard_transform_test.cpp" + ], + "additional_libs": [ + "custom_ops" + ] + }, { "directory": "extension/data_loader/test", "sources": [