From 07adf1a07ae07c2c4ea6c5da78b4ec09bc50c8bf Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Fri, 7 Feb 2025 17:36:57 -0800 Subject: [PATCH 01/10] Update [ghstack-poisoned] --- build/Codegen.cmake | 21 ++ build/build_variables.bzl | 495 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 516 insertions(+) create mode 100644 build/build_variables.bzl diff --git a/build/Codegen.cmake b/build/Codegen.cmake index 435b3d24802..0d93590be99 100644 --- a/build/Codegen.cmake +++ b/build/Codegen.cmake @@ -213,3 +213,24 @@ function(merge_yaml) WORKING_DIRECTORY ${EXECUTORCH_ROOT} ) endfunction() + +function(append_filelist name outputvar) + set(_rootdir "${EXECUTORCH_ROOT}/") + # configure_file adds its input to the list of CMAKE_RERUN dependencies + configure_file( + ${PROJECT_SOURCE_DIR}/build/build_variables.bzl + ${PROJECT_BINARY_DIR}/build_variables.bzl + COPYONLY) + execute_process( + COMMAND "${Python_EXECUTABLE}" -c + "exec(open('${PROJECT_SOURCE_DIR}/build/build_variables.bzl').read());print(';'.join(['${_rootdir}' + x for x in ${name}]))" + WORKING_DIRECTORY "${_rootdir}" + RESULT_VARIABLE _retval + OUTPUT_VARIABLE _tempvar) + if(NOT _retval EQUAL 0) + message(FATAL_ERROR "Failed to fetch filelist ${name} from build_variables.bzl") + endif() + string(REPLACE "\n" "" _tempvar "${_tempvar}") + list(APPEND ${outputvar} ${_tempvar}) + set(${outputvar} "${${outputvar}}" PARENT_SCOPE) +endfunction() diff --git a/build/build_variables.bzl b/build/build_variables.bzl new file mode 100644 index 00000000000..75f1792f6fa --- /dev/null +++ b/build/build_variables.bzl @@ -0,0 +1,495 @@ +# WARNING: the contents of this file must BOTH be valid Starlark (for Buck) as well as +# valid Python (for our cmake build). This means that load() directives are not allowed +# (as they are not recognized by Python). If you want to fix this, figure out how run +# this file from cmake with a proper Starlark interpreter as part of the default OSS +# build process. If you need some nontrivial Starlark features, make a separate bzl +# file. (Remember that bzl files are not exported via ShipIt by default, so you may also +# need to update ExecuTorch's ShipIt config.) + +# _executorch__srcs +EXECUTORCH_SRCS = [ + "kernels/prim_ops/et_copy_index.cpp", + "kernels/prim_ops/et_view.cpp", + "kernels/prim_ops/register_prim_ops.cpp", +] + +# _executorch_core__srcs +EXECUTORCH_CORE_SRCS = [ + "runtime/backend/interface.cpp", + "runtime/core/evalue.cpp", + "runtime/core/exec_aten/util/tensor_shape_to_c_string.cpp", + "runtime/core/exec_aten/util/tensor_util_portable.cpp", + "runtime/core/portable_type/tensor_impl.cpp", + "runtime/core/tag.cpp", + "runtime/executor/method.cpp", + "runtime/executor/method_meta.cpp", + "runtime/executor/program.cpp", + "runtime/executor/tensor_parser_exec_aten.cpp", + "runtime/executor/tensor_parser_portable.cpp", + "runtime/kernel/operator_registry.cpp", + "runtime/platform/abort.cpp", + "runtime/platform/default/posix.cpp", + "runtime/platform/log.cpp", + "runtime/platform/profiler.cpp", + "runtime/platform/runtime.cpp", + "schema/extended_header.cpp", +] + +# _portable_kernels__srcs +PORTABLE_KERNELS_SRCS = [ + "kernels/portable/cpu/op__empty_dim_order.cpp", + "kernels/portable/cpu/op__to_dim_order_copy.cpp", + "kernels/portable/cpu/op_abs.cpp", + "kernels/portable/cpu/op_acos.cpp", + "kernels/portable/cpu/op_acosh.cpp", + "kernels/portable/cpu/op_add.cpp", + "kernels/portable/cpu/op_addmm.cpp", + "kernels/portable/cpu/op_alias_copy.cpp", + "kernels/portable/cpu/op_allclose.cpp", + "kernels/portable/cpu/op_amax.cpp", + "kernels/portable/cpu/op_amin.cpp", + "kernels/portable/cpu/op_any.cpp", + "kernels/portable/cpu/op_arange.cpp", + "kernels/portable/cpu/op_argmax.cpp", + "kernels/portable/cpu/op_argmin.cpp", + "kernels/portable/cpu/op_as_strided_copy.cpp", + "kernels/portable/cpu/op_asin.cpp", + "kernels/portable/cpu/op_asinh.cpp", + "kernels/portable/cpu/op_atan.cpp", + "kernels/portable/cpu/op_atan2.cpp", + "kernels/portable/cpu/op_atanh.cpp", + "kernels/portable/cpu/op_avg_pool2d.cpp", + "kernels/portable/cpu/op_bitwise_and.cpp", + "kernels/portable/cpu/op_bitwise_not.cpp", + "kernels/portable/cpu/op_bitwise_or.cpp", + "kernels/portable/cpu/op_bitwise_xor.cpp", + "kernels/portable/cpu/op_bmm.cpp", + "kernels/portable/cpu/op_cat.cpp", + "kernels/portable/cpu/op_cdist_forward.cpp", + "kernels/portable/cpu/op_ceil.cpp", + "kernels/portable/cpu/op_clamp.cpp", + "kernels/portable/cpu/op_clone.cpp", + "kernels/portable/cpu/op_constant_pad_nd.cpp", + "kernels/portable/cpu/op_convolution.cpp", + "kernels/portable/cpu/op_convolution_backward.cpp", + "kernels/portable/cpu/op_copy.cpp", + "kernels/portable/cpu/op_cos.cpp", + "kernels/portable/cpu/op_cosh.cpp", + "kernels/portable/cpu/op_cumsum.cpp", + "kernels/portable/cpu/op_detach_copy.cpp", + "kernels/portable/cpu/op_diagonal_copy.cpp", + "kernels/portable/cpu/op_div.cpp", + "kernels/portable/cpu/op_embedding.cpp", + "kernels/portable/cpu/op_empty.cpp", + "kernels/portable/cpu/op_eq.cpp", + "kernels/portable/cpu/op_erf.cpp", + "kernels/portable/cpu/op_exp.cpp", + "kernels/portable/cpu/op_expand_copy.cpp", + "kernels/portable/cpu/op_expm1.cpp", + "kernels/portable/cpu/op_fill.cpp", + "kernels/portable/cpu/op_flip.cpp", + "kernels/portable/cpu/op_floor.cpp", + "kernels/portable/cpu/op_floor_divide.cpp", + "kernels/portable/cpu/op_fmod.cpp", + "kernels/portable/cpu/op_full.cpp", + "kernels/portable/cpu/op_full_like.cpp", + "kernels/portable/cpu/op_gather.cpp", + "kernels/portable/cpu/op_ge.cpp", + "kernels/portable/cpu/op_gelu.cpp", + "kernels/portable/cpu/op_glu.cpp", + "kernels/portable/cpu/op_gt.cpp", + "kernels/portable/cpu/op_hardtanh.cpp", + "kernels/portable/cpu/op_index.cpp", + "kernels/portable/cpu/op_index_put.cpp", + "kernels/portable/cpu/op_index_select.cpp", + "kernels/portable/cpu/op_isinf.cpp", + "kernels/portable/cpu/op_isnan.cpp", + "kernels/portable/cpu/op_le.cpp", + "kernels/portable/cpu/op_leaky_relu.cpp", + "kernels/portable/cpu/op_lift_fresh_copy.cpp", + "kernels/portable/cpu/op_linear_scratch_example.cpp", + "kernels/portable/cpu/op_log.cpp", + "kernels/portable/cpu/op_log10.cpp", + "kernels/portable/cpu/op_log1p.cpp", + "kernels/portable/cpu/op_log2.cpp", + "kernels/portable/cpu/op_log_softmax.cpp", + "kernels/portable/cpu/op_logical_and.cpp", + "kernels/portable/cpu/op_logical_not.cpp", + "kernels/portable/cpu/op_logical_or.cpp", + "kernels/portable/cpu/op_logical_xor.cpp", + "kernels/portable/cpu/op_logit.cpp", + "kernels/portable/cpu/op_lt.cpp", + "kernels/portable/cpu/op_masked_fill.cpp", + "kernels/portable/cpu/op_masked_scatter.cpp", + "kernels/portable/cpu/op_masked_select.cpp", + "kernels/portable/cpu/op_max.cpp", + "kernels/portable/cpu/op_max_pool2d_with_indices.cpp", + "kernels/portable/cpu/op_maximum.cpp", + "kernels/portable/cpu/op_mean.cpp", + "kernels/portable/cpu/op_min.cpp", + "kernels/portable/cpu/op_minimum.cpp", + "kernels/portable/cpu/op_mm.cpp", + "kernels/portable/cpu/op_mul.cpp", + "kernels/portable/cpu/op_narrow_copy.cpp", + "kernels/portable/cpu/op_native_batch_norm.cpp", + "kernels/portable/cpu/op_native_group_norm.cpp", + "kernels/portable/cpu/op_native_layer_norm.cpp", + "kernels/portable/cpu/op_ne.cpp", + "kernels/portable/cpu/op_neg.cpp", + "kernels/portable/cpu/op_nonzero.cpp", + "kernels/portable/cpu/op_ones.cpp", + "kernels/portable/cpu/op_pdist_forward.cpp", + "kernels/portable/cpu/op_permute_copy.cpp", + "kernels/portable/cpu/op_pixel_shuffle.cpp", + "kernels/portable/cpu/op_pixel_unshuffle.cpp", + "kernels/portable/cpu/op_pow.cpp", + "kernels/portable/cpu/op_prod.cpp", + "kernels/portable/cpu/op_reciprocal.cpp", + "kernels/portable/cpu/op_reflection_pad1d.cpp", + "kernels/portable/cpu/op_reflection_pad2d.cpp", + "kernels/portable/cpu/op_reflection_pad3d.cpp", + "kernels/portable/cpu/op_relu.cpp", + "kernels/portable/cpu/op_remainder.cpp", + "kernels/portable/cpu/op_repeat.cpp", + "kernels/portable/cpu/op_repeat_interleave.cpp", + "kernels/portable/cpu/op_replication_pad1d.cpp", + "kernels/portable/cpu/op_replication_pad2d.cpp", + "kernels/portable/cpu/op_replication_pad3d.cpp", + "kernels/portable/cpu/op_roll.cpp", + "kernels/portable/cpu/op_round.cpp", + "kernels/portable/cpu/op_rsqrt.cpp", + "kernels/portable/cpu/op_rsub.cpp", + "kernels/portable/cpu/op_scalar_tensor.cpp", + "kernels/portable/cpu/op_scatter.cpp", + "kernels/portable/cpu/op_scatter_add.cpp", + "kernels/portable/cpu/op_select_copy.cpp", + "kernels/portable/cpu/op_select_scatter.cpp", + "kernels/portable/cpu/op_sigmoid.cpp", + "kernels/portable/cpu/op_sign.cpp", + "kernels/portable/cpu/op_sin.cpp", + "kernels/portable/cpu/op_sinh.cpp", + "kernels/portable/cpu/op_slice_copy.cpp", + "kernels/portable/cpu/op_slice_scatter.cpp", + "kernels/portable/cpu/op_softmax.cpp", + "kernels/portable/cpu/op_split_copy.cpp", + "kernels/portable/cpu/op_split_with_sizes_copy.cpp", + "kernels/portable/cpu/op_sqrt.cpp", + "kernels/portable/cpu/op_squeeze_copy.cpp", + "kernels/portable/cpu/op_stack.cpp", + "kernels/portable/cpu/op_sub.cpp", + "kernels/portable/cpu/op_sum.cpp", + "kernels/portable/cpu/op_t_copy.cpp", + "kernels/portable/cpu/op_tan.cpp", + "kernels/portable/cpu/op_tanh.cpp", + "kernels/portable/cpu/op_to_copy.cpp", + "kernels/portable/cpu/op_topk.cpp", + "kernels/portable/cpu/op_transpose_copy.cpp", + "kernels/portable/cpu/op_tril.cpp", + "kernels/portable/cpu/op_trunc.cpp", + "kernels/portable/cpu/op_unbind_copy.cpp", + "kernels/portable/cpu/op_unsqueeze_copy.cpp", + "kernels/portable/cpu/op_upsample_bilinear2d.cpp", + "kernels/portable/cpu/op_upsample_nearest2d.cpp", + "kernels/portable/cpu/op_var.cpp", + "kernels/portable/cpu/op_view_copy.cpp", + "kernels/portable/cpu/op_where.cpp", + "kernels/portable/cpu/op_zeros.cpp", + "kernels/portable/cpu/pattern/unary_ufunc_realh.cpp", + "kernels/portable/cpu/pattern/unary_ufunc_realhb_to_bool.cpp", + "kernels/portable/cpu/pattern/unary_ufunc_realhbbf16_to_floathbf16.cpp", + "kernels/portable/cpu/util/activation_ops_util.cpp", + "kernels/portable/cpu/util/advanced_index_util.cpp", + "kernels/portable/cpu/util/broadcast_util.cpp", + "kernels/portable/cpu/util/copy_ops_util.cpp", + "kernels/portable/cpu/util/distance_util.cpp", + "kernels/portable/cpu/util/dtype_util.cpp", + "kernels/portable/cpu/util/index_util.cpp", + "kernels/portable/cpu/util/kernel_ops_util.cpp", + "kernels/portable/cpu/util/matmul_ops_util.cpp", + "kernels/portable/cpu/util/normalization_ops_util.cpp", + "kernels/portable/cpu/util/padding_util.cpp", + "kernels/portable/cpu/util/reduce_util.cpp", + "kernels/portable/cpu/util/repeat_util.cpp", + "kernels/portable/cpu/util/select_copy_util.cpp", + "kernels/portable/cpu/util/slice_util.cpp", + "kernels/portable/cpu/util/upsample_util.cpp", +] + +# _optimized_kernels__srcs +OPTIMIZED_KERNELS_SRCS = [ + "extension/parallel/thread_parallel.cpp", + "kernels/optimized/blas/BlasKernel.cpp", + "kernels/optimized/blas/CPUBlas.cpp", + "kernels/optimized/cpu/op_add.cpp", + "kernels/optimized/cpu/op_bmm.cpp", + "kernels/optimized/cpu/op_div.cpp", + "kernels/optimized/cpu/op_exp.cpp", + "kernels/optimized/cpu/op_fft_r2c.cpp", + "kernels/optimized/cpu/op_le.cpp", + "kernels/optimized/cpu/op_linear.cpp", + "kernels/optimized/cpu/op_mm.cpp", + "kernels/optimized/cpu/op_mul.cpp", + "kernels/optimized/cpu/op_native_layer_norm.cpp", + "kernels/optimized/cpu/op_neg.cpp", + "kernels/optimized/cpu/op_sigmoid.cpp", + "kernels/optimized/cpu/op_sub.cpp", +] + +# _quantized_kernels__srcs +QUANTIZED_KERNELS_SRCS = [ + "kernels/quantized/cpu/embeddingxb.cpp", + "kernels/quantized/cpu/op_add.cpp", + "kernels/quantized/cpu/op_choose_qparams.cpp", + "kernels/quantized/cpu/op_dequantize.cpp", + "kernels/quantized/cpu/op_embedding.cpp", + "kernels/quantized/cpu/op_embedding2b.cpp", + "kernels/quantized/cpu/op_embedding4b.cpp", + "kernels/quantized/cpu/op_mixed_linear.cpp", + "kernels/quantized/cpu/op_mixed_mm.cpp", + "kernels/quantized/cpu/op_quantize.cpp", +] + +# _program_schema__srcs +PROGRAM_SCHEMA_SRCS = [ + "schema/program.fbs", + "schema/scalar_type.fbs", +] + +# _optimized_cpublas__srcs +OPTIMIZED_CPUBLAS_SRCS = [ + "extension/parallel/thread_parallel.cpp", + "extension/threadpool/threadpool.cpp", + "extension/threadpool/threadpool_guard.cpp", + "kernels/optimized/blas/BlasKernel.cpp", + "kernels/optimized/blas/CPUBlas.cpp", +] + +# _optimized_native_cpu_ops_oss__srcs +OPTIMIZED_NATIVE_CPU_OPS_OSS_SRCS = [ + "codegen/templates/RegisterCodegenUnboxedKernels.cpp", + "codegen/templates/RegisterDispatchKeyCustomOps.cpp", + "codegen/templates/RegisterKernels.cpp", + "codegen/templates/RegisterSchema.cpp", + "extension/parallel/thread_parallel.cpp", + "extension/threadpool/threadpool.cpp", + "extension/threadpool/threadpool_guard.cpp", + "kernels/optimized/blas/BlasKernel.cpp", + "kernels/optimized/blas/CPUBlas.cpp", + "kernels/optimized/cpu/op_add.cpp", + "kernels/optimized/cpu/op_bmm.cpp", + "kernels/optimized/cpu/op_div.cpp", + "kernels/optimized/cpu/op_exp.cpp", + "kernels/optimized/cpu/op_fft_r2c.cpp", + "kernels/optimized/cpu/op_le.cpp", + "kernels/optimized/cpu/op_linear.cpp", + "kernels/optimized/cpu/op_mm.cpp", + "kernels/optimized/cpu/op_mul.cpp", + "kernels/optimized/cpu/op_native_layer_norm.cpp", + "kernels/optimized/cpu/op_neg.cpp", + "kernels/optimized/cpu/op_sigmoid.cpp", + "kernels/optimized/cpu/op_sub.cpp", +] + +# _extension_data_loader__srcs +EXTENSION_DATA_LOADER_SRCS = [ + "extension/data_loader/file_data_loader.cpp", + "extension/data_loader/mmap_data_loader.cpp", +] + +# _extension_module__srcs +EXTENSION_MODULE_SRCS = [ + "extension/module/module.cpp", +] + +# _extension_runner_util__srcs +EXTENSION_RUNNER_UTIL_SRCS = [ + "extension/runner_util/inputs.cpp", + "extension/runner_util/inputs_portable.cpp", +] + +# _extension_llm_runner__srcs +EXTENSION_LLM_RUNNER_SRCS = [ + "extension/data_loader/file_data_loader.cpp", + "extension/data_loader/mmap_data_loader.cpp", + "extension/llm/runner/text_decoder_runner.cpp", + "extension/llm/runner/text_prefiller.cpp", + "extension/llm/sampler/sampler.cpp", + "extension/tensor/tensor_ptr.cpp", + "extension/tensor/tensor_ptr_maker.cpp", +] + +# _extension_tensor__srcs +EXTENSION_TENSOR_SRCS = [ + "extension/tensor/tensor_ptr.cpp", + "extension/tensor/tensor_ptr_maker.cpp", +] + +# _extension_threadpool__srcs +EXTENSION_THREADPOOL_SRCS = [ + "extension/threadpool/threadpool.cpp", + "extension/threadpool/threadpool_guard.cpp", +] + +# _extension_training__srcs +EXTENSION_TRAINING_SRCS = [ + "extension/data_loader/file_data_loader.cpp", + "extension/data_loader/mmap_data_loader.cpp", + "extension/module/module.cpp", + "extension/training/module/training_module.cpp", + "extension/training/optimizer/sgd.cpp", + "kernels/prim_ops/et_copy_index.cpp", + "kernels/prim_ops/et_view.cpp", + "kernels/prim_ops/register_prim_ops.cpp", +] + +# _train_xor__srcs +TRAIN_XOR_SRCS = [ + "extension/data_loader/file_data_loader.cpp", + "extension/data_loader/mmap_data_loader.cpp", + "extension/flat_tensor/serialize/flat_tensor_header.cpp", + "extension/flat_tensor/serialize/serialize.cpp", + "extension/module/module.cpp", + "extension/tensor/tensor_ptr.cpp", + "extension/tensor/tensor_ptr_maker.cpp", + "extension/training/examples/XOR/train.cpp", + "extension/training/module/training_module.cpp", + "extension/training/optimizer/sgd.cpp", +] + +# _executor_runner__srcs +EXECUTOR_RUNNER_SRCS = [ + "examples/portable/executor_runner/executor_runner.cpp", + "extension/data_loader/file_data_loader.cpp", + "extension/evalue_util/print_evalue.cpp", + "extension/runner_util/inputs.cpp", + "extension/runner_util/inputs_portable.cpp", + "runtime/executor/test/test_backend_compiler_lib.cpp", +] + +# _size_test__srcs +SIZE_TEST_SRCS = [ + "extension/data_loader/file_data_loader.cpp", + "test/size_test.cpp", +] + +# _mps_executor_runner__srcs +MPS_EXECUTOR_RUNNER_SRCS = [ + "backends/apple/mps/runtime/MPSBackend.mm", + "backends/apple/mps/runtime/MPSCompiler.mm", + "backends/apple/mps/runtime/MPSDelegateHeader.mm", + "backends/apple/mps/runtime/MPSDevice.mm", + "backends/apple/mps/runtime/MPSExecutor.mm", + "backends/apple/mps/runtime/MPSGraphBuilder.mm", + "backends/apple/mps/runtime/MPSStream.mm", + "backends/apple/mps/runtime/operations/ActivationOps.mm", + "backends/apple/mps/runtime/operations/BinaryOps.mm", + "backends/apple/mps/runtime/operations/ClampOps.mm", + "backends/apple/mps/runtime/operations/ConstantOps.mm", + "backends/apple/mps/runtime/operations/ConvolutionOps.mm", + "backends/apple/mps/runtime/operations/IndexingOps.mm", + "backends/apple/mps/runtime/operations/LinearAlgebra.mm", + "backends/apple/mps/runtime/operations/NormalizationOps.mm", + "backends/apple/mps/runtime/operations/OperationUtils.mm", + "backends/apple/mps/runtime/operations/PadOps.mm", + "backends/apple/mps/runtime/operations/PoolingOps.mm", + "backends/apple/mps/runtime/operations/QuantDequant.mm", + "backends/apple/mps/runtime/operations/RangeOps.mm", + "backends/apple/mps/runtime/operations/ReduceOps.mm", + "backends/apple/mps/runtime/operations/ShapeOps.mm", + "backends/apple/mps/runtime/operations/UnaryOps.mm", + "devtools/bundled_program/bundled_program.cpp", + "devtools/etdump/emitter.cpp", + "devtools/etdump/etdump_flatcc.cpp", + "examples/apple/mps/executor_runner/mps_executor_runner.mm", + "extension/data_loader/file_data_loader.cpp", + "extension/evalue_util/print_evalue.cpp", + "extension/runner_util/inputs.cpp", + "extension/runner_util/inputs_portable.cpp", +] + +# _mps_backend__srcs +MPS_BACKEND_SRCS = [ + "backends/apple/mps/runtime/MPSBackend.mm", + "backends/apple/mps/runtime/MPSCompiler.mm", + "backends/apple/mps/runtime/MPSDelegateHeader.mm", + "backends/apple/mps/runtime/MPSDevice.mm", + "backends/apple/mps/runtime/MPSExecutor.mm", + "backends/apple/mps/runtime/MPSGraphBuilder.mm", + "backends/apple/mps/runtime/MPSStream.mm", + "backends/apple/mps/runtime/operations/ActivationOps.mm", + "backends/apple/mps/runtime/operations/BinaryOps.mm", + "backends/apple/mps/runtime/operations/ClampOps.mm", + "backends/apple/mps/runtime/operations/ConstantOps.mm", + "backends/apple/mps/runtime/operations/ConvolutionOps.mm", + "backends/apple/mps/runtime/operations/IndexingOps.mm", + "backends/apple/mps/runtime/operations/LinearAlgebra.mm", + "backends/apple/mps/runtime/operations/NormalizationOps.mm", + "backends/apple/mps/runtime/operations/OperationUtils.mm", + "backends/apple/mps/runtime/operations/PadOps.mm", + "backends/apple/mps/runtime/operations/PoolingOps.mm", + "backends/apple/mps/runtime/operations/QuantDequant.mm", + "backends/apple/mps/runtime/operations/RangeOps.mm", + "backends/apple/mps/runtime/operations/ReduceOps.mm", + "backends/apple/mps/runtime/operations/ShapeOps.mm", + "backends/apple/mps/runtime/operations/UnaryOps.mm", +] + +# _mps_schema__srcs +MPS_SCHEMA_SRCS = [ + "backends/apple/mps/serialization/schema.fbs", +] + +# _xnn_executor_runner__srcs +XNN_EXECUTOR_RUNNER_SRCS = [ + "examples/portable/executor_runner/executor_runner.cpp", + "extension/data_loader/file_data_loader.cpp", + "extension/evalue_util/print_evalue.cpp", + "extension/runner_util/inputs.cpp", + "extension/runner_util/inputs_portable.cpp", +] + +# _xnnpack_backend__srcs +XNNPACK_BACKEND_SRCS = [ + "backends/xnnpack/runtime/XNNCompiler.cpp", + "backends/xnnpack/runtime/XNNExecutor.cpp", + "backends/xnnpack/runtime/XNNHeader.cpp", + "backends/xnnpack/runtime/XNNPACKBackend.cpp", + "backends/xnnpack/runtime/profiling/XNNProfiler.cpp", + "extension/threadpool/threadpool.cpp", + "extension/threadpool/threadpool_guard.cpp", +] + +# _xnnpack_schema__srcs +XNNPACK_SCHEMA_SRCS = [ + "backends/xnnpack/serialization/runtime_schema.fbs", +] + +# _vulkan_schema__srcs +VULKAN_SCHEMA_SRCS = [ + "backends/vulkan/serialization/schema.fbs", +] + +# _custom_ops__srcs +CUSTOM_OPS_SRCS = [ + "extension/llm/custom_ops/op_fallback.cpp", + "extension/llm/custom_ops/op_fast_hadamard_transform.cpp", + "extension/llm/custom_ops/op_sdpa.cpp", + "extension/llm/custom_ops/op_update_cache.cpp", + "extension/llm/custom_ops/spinquant/fast_hadamard_transform.cpp", + "extension/llm/custom_ops/spinquant/third-party/FFHT/fht_neon.c", + "kernels/portable/cpu/util/reduce_util.cpp", +] + +# _llama_runner__srcs +LLAMA_RUNNER_SRCS = [ + "examples/models/llama/runner/runner.cpp", + "examples/models/llama/tokenizer/llama_tiktoken.cpp", + "extension/evalue_util/print_evalue.cpp", + "extension/llm/runner/text_decoder_runner.cpp", + "extension/llm/runner/text_prefiller.cpp", + "extension/llm/sampler/sampler.cpp", + "extension/llm/tokenizer/bpe_tokenizer.cpp", + "extension/llm/tokenizer/tiktoken.cpp", + "extension/tensor/tensor_ptr.cpp", + "extension/tensor/tensor_ptr_maker.cpp", +] From bc3fd39b63e0b7dd9d9e37245255018a1720c0c6 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Sat, 8 Feb 2025 19:07:37 -0800 Subject: [PATCH 02/10] Update [ghstack-poisoned] --- CMakeLists.txt | 2 + build/Codegen.cmake | 107 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 100 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ca8d1bbbcf2..09fa8ea74f4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,6 +44,7 @@ cmake_minimum_required(VERSION 3.19) project(executorch) +include(build/Codegen.cmake) include(build/Utils.cmake) include(CMakeDependentOption) @@ -384,6 +385,7 @@ if(NOT EXECUTORCH_SRCS_FILE) message(STATUS "executorch: Generating source lists") set(EXECUTORCH_SRCS_FILE "${CMAKE_CURRENT_BINARY_DIR}/executorch_srcs.cmake") extract_sources(${EXECUTORCH_SRCS_FILE}) + validate_build_variables() endif() # This file defines the `___srcs` variables used below. diff --git a/build/Codegen.cmake b/build/Codegen.cmake index 0d93590be99..45f5d48e391 100644 --- a/build/Codegen.cmake +++ b/build/Codegen.cmake @@ -215,22 +215,111 @@ function(merge_yaml) endfunction() function(append_filelist name outputvar) - set(_rootdir "${EXECUTORCH_ROOT}/") # configure_file adds its input to the list of CMAKE_RERUN dependencies configure_file( - ${PROJECT_SOURCE_DIR}/build/build_variables.bzl - ${PROJECT_BINARY_DIR}/build_variables.bzl - COPYONLY) + ${PROJECT_SOURCE_DIR}/build/build_variables.bzl + ${PROJECT_BINARY_DIR}/build_variables.bzl COPYONLY + ) execute_process( - COMMAND "${Python_EXECUTABLE}" -c - "exec(open('${PROJECT_SOURCE_DIR}/build/build_variables.bzl').read());print(';'.join(['${_rootdir}' + x for x in ${name}]))" + COMMAND + "${PYTHON_EXECUTABLE}" -c + "exec(open('${PROJECT_SOURCE_DIR}/build/build_variables.bzl').read());print(';'.join(${name}))" WORKING_DIRECTORY "${_rootdir}" RESULT_VARIABLE _retval - OUTPUT_VARIABLE _tempvar) + OUTPUT_VARIABLE _tempvar + ERROR_VARIABLE _stderr + ) if(NOT _retval EQUAL 0) - message(FATAL_ERROR "Failed to fetch filelist ${name} from build_variables.bzl") + message( + FATAL_ERROR + "Failed to fetch filelist ${name} from build_variables.bzl with output ${_tempvar} and stderr ${_stderr}" + ) endif() string(REPLACE "\n" "" _tempvar "${_tempvar}") list(APPEND ${outputvar} ${_tempvar}) - set(${outputvar} "${${outputvar}}" PARENT_SCOPE) + set(${outputvar} + "${${outputvar}}" + PARENT_SCOPE + ) +endfunction() + +function(validate_build_variables) + include(${EXECUTORCH_SRCS_FILE}) + set(BUILD_VARIABLES_FILELISTS + EXECUTORCH_SRCS + EXECUTORCH_CORE_SRCS + PORTABLE_KERNELS_SRCS + OPTIMIZED_KERNELS_SRCS + QUANTIZED_KERNELS_SRCS + PROGRAM_SCHEMA_SRCS + OPTIMIZED_CPUBLAS_SRCS + OPTIMIZED_NATIVE_CPU_OPS_OSS_SRCS + EXTENSION_DATA_LOADER_SRCS + EXTENSION_MODULE_SRCS + EXTENSION_RUNNER_UTIL_SRCS + EXTENSION_LLM_RUNNER_SRCS + EXTENSION_TENSOR_SRCS + EXTENSION_THREADPOOL_SRCS + EXTENSION_TRAINING_SRCS + TRAIN_XOR_SRCS + EXECUTOR_RUNNER_SRCS + SIZE_TEST_SRCS + MPS_EXECUTOR_RUNNER_SRCS + MPS_BACKEND_SRCS + MPS_SCHEMA_SRCS + XNN_EXECUTOR_RUNNER_SRCS + XNNPACK_BACKEND_SRCS + XNNPACK_SCHEMA_SRCS + VULKAN_SCHEMA_SRCS + CUSTOM_OPS_SRCS + LLAMA_RUNNER_SRCS + ) + set(BUILD_VARIABLES_VARNAMES + _executorch__srcs + _executorch_core__srcs + _portable_kernels__srcs + _optimized_kernels__srcs + _quantized_kernels__srcs + _program_schema__srcs + _optimized_cpublas__srcs + _optimized_native_cpu_ops_oss__srcs + _extension_data_loader__srcs + _extension_module__srcs + _extension_runner_util__srcs + _extension_llm_runner__srcs + _extension_tensor__srcs + _extension_threadpool__srcs + _extension_training__srcs + _train_xor__srcs + _executor_runner__srcs + _size_test__srcs + _mps_executor_runner__srcs + _mps_backend__srcs + _mps_schema__srcs + _xnn_executor_runner__srcs + _xnnpack_backend__srcs + _xnnpack_schema__srcs + _vulkan_schema__srcs + _custom_ops__srcs + _llama_runner__srcs + ) + foreach(filelist_and_varname IN ZIP_LISTS BUILD_VARIABLES_FILELISTS + BUILD_VARIABLES_VARNAMES + ) + append_filelist( + ${filelist_and_varname_0} + "${filelist_and_varname_1}_from_build_variables" + ) + if(NOT ${filelist_and_varname_1} STREQUAL + ${filelist_and_varname_1}_from_build_variables + ) + message( + FATAL_ERROR + "Buck-generated ${filelist_and_varname_1} does not match hardcoded \ +${filelist_and_varname_0} in build_variables.bzl. Left: \ +${${filelist_and_varname_1}}\n \ +Right: ${${filelist_and_varname_1}_from_build_variables}" + ) + endif() + endforeach() endfunction() From 00ce92771a22f649ab5f6ac77cee1f14e4c0b2c6 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Mon, 10 Feb 2025 11:00:23 -0800 Subject: [PATCH 03/10] Update [ghstack-poisoned] --- .../custom_ops/spinquant/test/CMakeLists.txt | 30 +++++++++++++++++++ test/utils/OSSTestConfig.json | 11 +++++++ 2 files changed, 41 insertions(+) create mode 100644 extension/llm/custom_ops/spinquant/test/CMakeLists.txt diff --git a/extension/llm/custom_ops/spinquant/test/CMakeLists.txt b/extension/llm/custom_ops/spinquant/test/CMakeLists.txt new file mode 100644 index 00000000000..63e8207ed0a --- /dev/null +++ b/extension/llm/custom_ops/spinquant/test/CMakeLists.txt @@ -0,0 +1,30 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# @generated by test/utils/generate_gtest_cmakelists.py +# +# This file should be formatted with +# ~~~ +# cmake-format -i CMakeLists.txt +# ~~~ +# It should also be cmake-lint clean. +# + +cmake_minimum_required(VERSION 3.19) + +set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../../..) + +include(${EXECUTORCH_ROOT}/build/Test.cmake) + +set(_test_srcs + fast_hadamard_transform_test.cpp fast_hadamard_transform_test_impl.cpp + op_fast_hadamard_transform_test.cpp +) + +et_cxx_test( + extension_llm_custom_ops_spinquant_test SOURCES ${_test_srcs} EXTRA_LIBS + custom_ops +) diff --git a/test/utils/OSSTestConfig.json b/test/utils/OSSTestConfig.json index 2229b255401..12fa3c03658 100644 --- a/test/utils/OSSTestConfig.json +++ b/test/utils/OSSTestConfig.json @@ -1,4 +1,15 @@ { "tests": [ + { + "directory": "extension/llm/custom_ops/spinquant/test", + "sources": [ + "fast_hadamard_transform_test.cpp", + "fast_hadamard_transform_test_impl.cpp", + "op_fast_hadamard_transform_test.cpp" + ], + "additional_libs": [ + "custom_ops" + ] + }, { "directory": "extension/data_loader/test", "sources": [ From 6440d3debeec9a99c96f83c49f4b1dfa132053ee Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Mon, 10 Feb 2025 11:00:29 -0800 Subject: [PATCH 04/10] Update [ghstack-poisoned] --- build/cmake_deps.toml | 5 +---- extension/llm/custom_ops/CMakeLists.txt | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/build/cmake_deps.toml b/build/cmake_deps.toml index c7680708688..92ddc7d80c8 100644 --- a/build/cmake_deps.toml +++ b/build/cmake_deps.toml @@ -365,10 +365,7 @@ buck_targets = [ "//extension/llm/custom_ops:custom_ops", ] filters = [ - # Second clause is to pick up fht_neon.c/fht_avx.c from FFHT. TODO: - # remove filters and patch extract_sources.py's Buck query to fetch - # srcs; presumably filters is here to remove .h files. - "(.cpp$)|(fht.*\\.c$)", + ".cpp$", ] excludes = [ "^codegen", diff --git a/extension/llm/custom_ops/CMakeLists.txt b/extension/llm/custom_ops/CMakeLists.txt index 16ca4fff805..3203cd32d57 100644 --- a/extension/llm/custom_ops/CMakeLists.txt +++ b/extension/llm/custom_ops/CMakeLists.txt @@ -45,6 +45,22 @@ list(APPEND custom_ops_libs cpuinfo) list(APPEND custom_ops_libs cpublas) list(APPEND custom_ops_libs eigen_blas) +if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|armv7)$") + list(APPEND _custom_ops__srcs + "extension/llm/custom_ops/spinquant/third-party/FFHT/fht_neon.c" + ) +elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + list(APPEND _custom_ops__srcs + "extension/llm/custom_ops/spinquant/third-party/FFHT/fht_avx.c" + ) +else() + message( + FATAL_ERROR + "Unsupported CMAKE_SYSTEM_PROCESSOR ${CMAKE_SYSTEM_PROCESSOR}. (If \ +32-bit x86, try using fht_avx.c and send a PR if it works!)" + ) +endif() + list(TRANSFORM _custom_ops__srcs PREPEND "${EXECUTORCH_ROOT}/") if(NOT EXECUTORCH_BUILD_XNNPACK) From 630bdd001574309ea50159e0a5f1586c0ca19e3f Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Mon, 10 Feb 2025 11:15:39 -0800 Subject: [PATCH 05/10] Update [ghstack-poisoned] --- build/build_variables.bzl | 1 - 1 file changed, 1 deletion(-) diff --git a/build/build_variables.bzl b/build/build_variables.bzl index 75f1792f6fa..c3d631a9b47 100644 --- a/build/build_variables.bzl +++ b/build/build_variables.bzl @@ -476,7 +476,6 @@ CUSTOM_OPS_SRCS = [ "extension/llm/custom_ops/op_sdpa.cpp", "extension/llm/custom_ops/op_update_cache.cpp", "extension/llm/custom_ops/spinquant/fast_hadamard_transform.cpp", - "extension/llm/custom_ops/spinquant/third-party/FFHT/fht_neon.c", "kernels/portable/cpu/util/reduce_util.cpp", ] From 098432fdf33a6b94d62ae9fbe4915221aac10cba Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Mon, 10 Feb 2025 15:20:57 -0800 Subject: [PATCH 06/10] Update [ghstack-poisoned] --- build/build_variables.bzl | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/build/build_variables.bzl b/build/build_variables.bzl index c3d631a9b47..3bd0b8028ef 100644 --- a/build/build_variables.bzl +++ b/build/build_variables.bzl @@ -6,14 +6,12 @@ # file. (Remember that bzl files are not exported via ShipIt by default, so you may also # need to update ExecuTorch's ShipIt config.) -# _executorch__srcs EXECUTORCH_SRCS = [ "kernels/prim_ops/et_copy_index.cpp", "kernels/prim_ops/et_view.cpp", "kernels/prim_ops/register_prim_ops.cpp", ] -# _executorch_core__srcs EXECUTORCH_CORE_SRCS = [ "runtime/backend/interface.cpp", "runtime/core/evalue.cpp", @@ -35,7 +33,6 @@ EXECUTORCH_CORE_SRCS = [ "schema/extended_header.cpp", ] -# _portable_kernels__srcs PORTABLE_KERNELS_SRCS = [ "kernels/portable/cpu/op__empty_dim_order.cpp", "kernels/portable/cpu/op__to_dim_order_copy.cpp", @@ -215,7 +212,6 @@ PORTABLE_KERNELS_SRCS = [ "kernels/portable/cpu/util/upsample_util.cpp", ] -# _optimized_kernels__srcs OPTIMIZED_KERNELS_SRCS = [ "extension/parallel/thread_parallel.cpp", "kernels/optimized/blas/BlasKernel.cpp", @@ -235,7 +231,6 @@ OPTIMIZED_KERNELS_SRCS = [ "kernels/optimized/cpu/op_sub.cpp", ] -# _quantized_kernels__srcs QUANTIZED_KERNELS_SRCS = [ "kernels/quantized/cpu/embeddingxb.cpp", "kernels/quantized/cpu/op_add.cpp", @@ -249,13 +244,11 @@ QUANTIZED_KERNELS_SRCS = [ "kernels/quantized/cpu/op_quantize.cpp", ] -# _program_schema__srcs PROGRAM_SCHEMA_SRCS = [ "schema/program.fbs", "schema/scalar_type.fbs", ] -# _optimized_cpublas__srcs OPTIMIZED_CPUBLAS_SRCS = [ "extension/parallel/thread_parallel.cpp", "extension/threadpool/threadpool.cpp", @@ -264,7 +257,6 @@ OPTIMIZED_CPUBLAS_SRCS = [ "kernels/optimized/blas/CPUBlas.cpp", ] -# _optimized_native_cpu_ops_oss__srcs OPTIMIZED_NATIVE_CPU_OPS_OSS_SRCS = [ "codegen/templates/RegisterCodegenUnboxedKernels.cpp", "codegen/templates/RegisterDispatchKeyCustomOps.cpp", @@ -290,24 +282,20 @@ OPTIMIZED_NATIVE_CPU_OPS_OSS_SRCS = [ "kernels/optimized/cpu/op_sub.cpp", ] -# _extension_data_loader__srcs EXTENSION_DATA_LOADER_SRCS = [ "extension/data_loader/file_data_loader.cpp", "extension/data_loader/mmap_data_loader.cpp", ] -# _extension_module__srcs EXTENSION_MODULE_SRCS = [ "extension/module/module.cpp", ] -# _extension_runner_util__srcs EXTENSION_RUNNER_UTIL_SRCS = [ "extension/runner_util/inputs.cpp", "extension/runner_util/inputs_portable.cpp", ] -# _extension_llm_runner__srcs EXTENSION_LLM_RUNNER_SRCS = [ "extension/data_loader/file_data_loader.cpp", "extension/data_loader/mmap_data_loader.cpp", @@ -318,19 +306,16 @@ EXTENSION_LLM_RUNNER_SRCS = [ "extension/tensor/tensor_ptr_maker.cpp", ] -# _extension_tensor__srcs EXTENSION_TENSOR_SRCS = [ "extension/tensor/tensor_ptr.cpp", "extension/tensor/tensor_ptr_maker.cpp", ] -# _extension_threadpool__srcs EXTENSION_THREADPOOL_SRCS = [ "extension/threadpool/threadpool.cpp", "extension/threadpool/threadpool_guard.cpp", ] -# _extension_training__srcs EXTENSION_TRAINING_SRCS = [ "extension/data_loader/file_data_loader.cpp", "extension/data_loader/mmap_data_loader.cpp", @@ -342,7 +327,6 @@ EXTENSION_TRAINING_SRCS = [ "kernels/prim_ops/register_prim_ops.cpp", ] -# _train_xor__srcs TRAIN_XOR_SRCS = [ "extension/data_loader/file_data_loader.cpp", "extension/data_loader/mmap_data_loader.cpp", @@ -356,7 +340,6 @@ TRAIN_XOR_SRCS = [ "extension/training/optimizer/sgd.cpp", ] -# _executor_runner__srcs EXECUTOR_RUNNER_SRCS = [ "examples/portable/executor_runner/executor_runner.cpp", "extension/data_loader/file_data_loader.cpp", @@ -366,13 +349,11 @@ EXECUTOR_RUNNER_SRCS = [ "runtime/executor/test/test_backend_compiler_lib.cpp", ] -# _size_test__srcs SIZE_TEST_SRCS = [ "extension/data_loader/file_data_loader.cpp", "test/size_test.cpp", ] -# _mps_executor_runner__srcs MPS_EXECUTOR_RUNNER_SRCS = [ "backends/apple/mps/runtime/MPSBackend.mm", "backends/apple/mps/runtime/MPSCompiler.mm", @@ -407,7 +388,6 @@ MPS_EXECUTOR_RUNNER_SRCS = [ "extension/runner_util/inputs_portable.cpp", ] -# _mps_backend__srcs MPS_BACKEND_SRCS = [ "backends/apple/mps/runtime/MPSBackend.mm", "backends/apple/mps/runtime/MPSCompiler.mm", @@ -434,12 +414,10 @@ MPS_BACKEND_SRCS = [ "backends/apple/mps/runtime/operations/UnaryOps.mm", ] -# _mps_schema__srcs MPS_SCHEMA_SRCS = [ "backends/apple/mps/serialization/schema.fbs", ] -# _xnn_executor_runner__srcs XNN_EXECUTOR_RUNNER_SRCS = [ "examples/portable/executor_runner/executor_runner.cpp", "extension/data_loader/file_data_loader.cpp", @@ -448,7 +426,6 @@ XNN_EXECUTOR_RUNNER_SRCS = [ "extension/runner_util/inputs_portable.cpp", ] -# _xnnpack_backend__srcs XNNPACK_BACKEND_SRCS = [ "backends/xnnpack/runtime/XNNCompiler.cpp", "backends/xnnpack/runtime/XNNExecutor.cpp", @@ -459,17 +436,14 @@ XNNPACK_BACKEND_SRCS = [ "extension/threadpool/threadpool_guard.cpp", ] -# _xnnpack_schema__srcs XNNPACK_SCHEMA_SRCS = [ "backends/xnnpack/serialization/runtime_schema.fbs", ] -# _vulkan_schema__srcs VULKAN_SCHEMA_SRCS = [ "backends/vulkan/serialization/schema.fbs", ] -# _custom_ops__srcs CUSTOM_OPS_SRCS = [ "extension/llm/custom_ops/op_fallback.cpp", "extension/llm/custom_ops/op_fast_hadamard_transform.cpp", @@ -479,7 +453,6 @@ CUSTOM_OPS_SRCS = [ "kernels/portable/cpu/util/reduce_util.cpp", ] -# _llama_runner__srcs LLAMA_RUNNER_SRCS = [ "examples/models/llama/runner/runner.cpp", "examples/models/llama/tokenizer/llama_tiktoken.cpp", From d4aaa3633305f8e378d661db529830720f85bf42 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Mon, 10 Feb 2025 11:00:23 -0800 Subject: [PATCH 07/10] Update [ghstack-poisoned] --- .../custom_ops/spinquant/test/CMakeLists.txt | 30 +++++++++++++++++++ test/utils/OSSTestConfig.json | 11 +++++++ 2 files changed, 41 insertions(+) create mode 100644 extension/llm/custom_ops/spinquant/test/CMakeLists.txt diff --git a/extension/llm/custom_ops/spinquant/test/CMakeLists.txt b/extension/llm/custom_ops/spinquant/test/CMakeLists.txt new file mode 100644 index 00000000000..63e8207ed0a --- /dev/null +++ b/extension/llm/custom_ops/spinquant/test/CMakeLists.txt @@ -0,0 +1,30 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# @generated by test/utils/generate_gtest_cmakelists.py +# +# This file should be formatted with +# ~~~ +# cmake-format -i CMakeLists.txt +# ~~~ +# It should also be cmake-lint clean. +# + +cmake_minimum_required(VERSION 3.19) + +set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../../..) + +include(${EXECUTORCH_ROOT}/build/Test.cmake) + +set(_test_srcs + fast_hadamard_transform_test.cpp fast_hadamard_transform_test_impl.cpp + op_fast_hadamard_transform_test.cpp +) + +et_cxx_test( + extension_llm_custom_ops_spinquant_test SOURCES ${_test_srcs} EXTRA_LIBS + custom_ops +) diff --git a/test/utils/OSSTestConfig.json b/test/utils/OSSTestConfig.json index 5d53a72f6ae..b3eea132c74 100644 --- a/test/utils/OSSTestConfig.json +++ b/test/utils/OSSTestConfig.json @@ -1,4 +1,15 @@ { "tests": [ + { + "directory": "extension/llm/custom_ops/spinquant/test", + "sources": [ + "fast_hadamard_transform_test.cpp", + "fast_hadamard_transform_test_impl.cpp", + "op_fast_hadamard_transform_test.cpp" + ], + "additional_libs": [ + "custom_ops" + ] + }, { "directory": "extension/data_loader/test", "sources": [ From a826c409861adae71ed99f518e040b603a1b7104 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Wed, 12 Feb 2025 15:34:42 -0800 Subject: [PATCH 08/10] Update [ghstack-poisoned] --- test/run_oss_cpp_tests.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/test/run_oss_cpp_tests.sh b/test/run_oss_cpp_tests.sh index 59691fd138d..df042af268c 100755 --- a/test/run_oss_cpp_tests.sh +++ b/test/run_oss_cpp_tests.sh @@ -87,7 +87,6 @@ probe_additional_tests() { # CMakeLists.txt rules, that are buildable using build_and_run_test dirs=( examples/models/llama/tokenizer - extension/llm/custom_ops extension/llm/tokenizer ) From ff128f7cf9589529324f9a39cb4c9510805057aa Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Fri, 21 Feb 2025 14:34:16 -0800 Subject: [PATCH 09/10] Update [ghstack-poisoned] --- build/Utils.cmake | 47 +++++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/build/Utils.cmake b/build/Utils.cmake index 113f4829b86..a2c5ce94571 100644 --- a/build/Utils.cmake +++ b/build/Utils.cmake @@ -329,29 +329,25 @@ function(resolve_python_executable) endfunction() # find_package(Torch CONFIG REQUIRED) replacement for targets that have a -# header-only Torch dependency. Because find_package sets variables in the -# parent scope, we use a macro to preserve this rather than maintaining our own -# list of those variables. -macro(find_package_torch_headers) - # We cannot simply use CMAKE_FIND_ROOT_PATH_BOTH, because that does not - # propagate into TorchConfig.cmake. - foreach(mode_kind IN ITEMS PACKAGE LIBRARY INCLUDE) - set(OLD_CMAKE_FIND_ROOT_PATH_MODE_${mode_kind} - ${CMAKE_FIND_ROOT_PATH_MODE_${mode_kind}} - ) - set(CMAKE_FIND_ROOT_PATH_MODE_${mode_kind} BOTH) - endforeach() - find_package_torch() - foreach(mode_kind IN ITEMS PACKAGE LIBRARY INCLUDE) - set(CMAKE_FIND_ROOT_PATH_MODE_${mode_kind} - ${OLD_CMAKE_FIND_ROOT_PATH_MODE_${mode_kind}} - ) - endforeach() -endmacro() +# header-only Torch dependency. +# +# Unlike find_package(Torch ...), this will only set +# TORCH_INCLUDE_DIRS in the parent scope. In particular, it will NOT +# set any of the following: +# - TORCH_FOUND +# - TORCH_LIBRARY +# - TORCH_CXX_FLAGS +function(find_package_torch_headers) + # We implement this way rather than using find_package so that + # cross-compilation can still use the host's installed copy of + # torch, since the headers should be fine. + get_torch_base_path(TORCH_BASE_PATH) + set(TORCH_INCLUDE_DIRS "${TORCH_BASE_PATH}/include;${TORCH_BASE_PATH}/include/torch/csrc/api/include" PARENT_SCOPE) +endfunction() -# Add the Torch CMake configuration to CMAKE_PREFIX_PATH so that find_package -# can find Torch. -function(add_torch_to_cmake_prefix_path) +# Return the base path to the installed Torch Python library in +# outVar. +function(get_torch_base_path outVar) if(NOT PYTHON_EXECUTABLE) resolve_python_executable() endif() @@ -370,6 +366,13 @@ function(add_torch_to_cmake_prefix_path) message("Output:\n${_tmp_torch_path}") message(FATAL_ERROR "Error:\n${_tmp_torch_path_error}") endif() + set(${outVar} ${_tmp_torch_path} PARENT_SCOPE) +endfunction() + +# Add the Torch CMake configuration to CMAKE_PREFIX_PATH so that find_package +# can find Torch. +function(add_torch_to_cmake_prefix_path) + get_torch_base_path(_tmp_torch_path) list(APPEND CMAKE_PREFIX_PATH "${_tmp_torch_path}") set(CMAKE_PREFIX_PATH "${CMAKE_PREFIX_PATH}" From 9c7b3d13f576ae6191f158ec27fb3c25334de991 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Mon, 24 Feb 2025 21:14:04 -0800 Subject: [PATCH 10/10] Update [ghstack-poisoned] --- build/build_android_llm_demo.sh | 6 ++++-- extension/android_test/setup.sh | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/build/build_android_llm_demo.sh b/build/build_android_llm_demo.sh index beee2250062..cb2d47fdeb3 100644 --- a/build/build_android_llm_demo.sh +++ b/build/build_android_llm_demo.sh @@ -40,10 +40,11 @@ build_android_native_library() { EXECUTORCH_BUILD_NEURON=OFF fi - cmake --trace . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \ + cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \ -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \ -DANDROID_ABI="${ANDROID_ABI}" \ -DANDROID_PLATFORM=android-26 \ + -DBUILD_TESTING=OFF \ -DEXECUTORCH_ENABLE_LOGGING=ON \ -DEXECUTORCH_LOG_LEVEL=Info \ -DEXECUTORCH_BUILD_XNNPACK=ON \ @@ -69,10 +70,11 @@ build_android_native_library() { fi cmake --build "${CMAKE_OUT}" -j "${CMAKE_JOBS}" --target install --config "${EXECUTORCH_CMAKE_BUILD_TYPE}" - cmake --trace extension/android \ + cmake extension/android \ -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \ -DANDROID_ABI="${ANDROID_ABI}" \ -DANDROID_PLATFORM=android-26 \ + -DBUILD_TESTING=OFF \ -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \ -DEXECUTORCH_ENABLE_LOGGING=ON \ -DEXECUTORCH_LOG_LEVEL=Info \ diff --git a/extension/android_test/setup.sh b/extension/android_test/setup.sh index 88c7cf8be84..c21d2c09623 100755 --- a/extension/android_test/setup.sh +++ b/extension/android_test/setup.sh @@ -18,9 +18,10 @@ build_native_library() { CMAKE_OUT="cmake-out-android-${ANDROID_ABI}" ANDROID_NDK="${ANDROID_NDK:-/opt/ndk}" EXECUTORCH_CMAKE_BUILD_TYPE="${EXECUTORCH_CMAKE_BUILD_TYPE:-Release}" - cmake --trace . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \ + cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \ -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \ -DANDROID_ABI="${ANDROID_ABI}" \ + -DBUILD_TESTING=OFF \ -DEXECUTORCH_BUILD_XNNPACK=ON \ -DEXECUTORCH_XNNPACK_SHARED_WORKSPACE=ON \ -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ @@ -33,9 +34,10 @@ build_native_library() { cmake --build "${CMAKE_OUT}" -j16 --target install - cmake --trace extension/android \ + cmake extension/android \ -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}"/build/cmake/android.toolchain.cmake \ -DANDROID_ABI="${ANDROID_ABI}" \ + -DBUILD_TESTING=OFF \ -DCMAKE_INSTALL_PREFIX=c"${CMAKE_OUT}" \ -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ -DEXECUTORCH_BUILD_LLAMA_JNI=ON \