diff --git a/.ci/scripts/build_llama_android.sh b/.ci/scripts/build_llama_android.sh index 7d3370ee561..99d03041b17 100644 --- a/.ci/scripts/build_llama_android.sh +++ b/.ci/scripts/build_llama_android.sh @@ -28,7 +28,7 @@ install_executorch_and_backend_lib() { -DEXECUTORCH_BUILD_XNNPACK=ON \ -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ - -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ + -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \ -DXNNPACK_ENABLE_ARM_BF16=OFF \ -Bcmake-android-out . @@ -47,7 +47,7 @@ build_llama_runner() { -DEXECUTORCH_BUILD_XNNPACK=ON \ -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ - -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ + -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \ -Bcmake-android-out/examples/models/llama2 examples/models/llama2 cmake --build cmake-android-out/examples/models/llama2 -j4 --config Release diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh index 12ba9ba52db..ebf9f09d240 100644 --- a/.ci/scripts/test_llama.sh +++ b/.ci/scripts/test_llama.sh @@ -110,7 +110,7 @@ cmake_install_executorch_libraries() { -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ - -DEXECUTORCH_BUILD_KERNELS_CUSTOM="$CUSTOM" \ + -DEXECUTORCH_BUILD_EXTENSION_LLM="$CUSTOM" \ -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ -DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \ @@ -129,7 +129,7 @@ cmake_build_llama_runner() { retry cmake \ -DCMAKE_INSTALL_PREFIX=cmake-out \ -DCMAKE_BUILD_TYPE=Debug \ - -DEXECUTORCH_BUILD_KERNELS_CUSTOM="$CUSTOM" \ + -DEXECUTORCH_BUILD_EXTENSION_LLM="$CUSTOM" \ -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ -DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \ -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \ diff --git a/.ci/scripts/test_llava.sh b/.ci/scripts/test_llava.sh index 8ac87b2302d..fb214e4ec65 100644 --- a/.ci/scripts/test_llava.sh +++ b/.ci/scripts/test_llava.sh @@ -37,7 +37,7 @@ EXECUTORCH_COMMON_CMAKE_ARGS=" \ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ - -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ + -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \ -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ -DEXECUTORCH_BUILD_XNNPACK=ON \ @@ -68,7 +68,7 @@ LLAVA_COMMON_CMAKE_ARGS=" \ -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \ -DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ - -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ + -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \ -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ -DEXECUTORCH_BUILD_XNNPACK=ON" diff --git a/.ci/scripts/test_phi_3_mini.sh b/.ci/scripts/test_phi_3_mini.sh index 40767013e23..05f1efa6b8a 100644 --- a/.ci/scripts/test_phi_3_mini.sh +++ b/.ci/scripts/test_phi_3_mini.sh @@ -32,7 +32,7 @@ cmake_install_executorch_libraries() { -DEXECUTORCH_BUILD_XNNPACK=ON \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ - -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ + -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \ -B${BUILD_DIR} . cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${BUILD_TYPE} @@ -42,7 +42,7 @@ cmake_build_phi_3_mini() { cmake -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \ -DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ - -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ + -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \ -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ -DEXECUTORCH_BUILD_XNNPACK=ON \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index d7130561fa6..c516cb28e26 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -371,7 +371,7 @@ jobs: -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ - -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ + -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \ -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ -DEXECUTORCH_BUILD_XNNPACK=ON \ @@ -384,7 +384,7 @@ jobs: cmake \ -DCMAKE_INSTALL_PREFIX=cmake-out \ -DCMAKE_BUILD_TYPE=Release \ - -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ + -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \ -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ -DEXECUTORCH_BUILD_XNNPACK=ON \ diff --git a/CMakeLists.txt b/CMakeLists.txt index 34fed923529..42dc792560b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -165,14 +165,14 @@ option(EXECUTORCH_BUILD_ARM_BAREMETAL option(EXECUTORCH_BUILD_COREML "Build the Core ML backend" OFF) -option(EXECUTORCH_BUILD_KERNELS_CUSTOM "Build the custom kernels" OFF) - -option(EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT "Build the custom ops lib for AOT" +option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "Build the Data Loader extension" OFF ) -option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "Build the Data Loader extension" - OFF +option(EXECUTORCH_BUILD_EXTENSION_LLM "Build the LLM extension" OFF) + +option(EXECUTORCH_BUILD_EXTENSION_LLM_AOT + "Build the LLM extension custom ops lib for AOT" OFF ) option(EXECUTORCH_BUILD_EXTENSION_MODULE "Build the Module extension" OFF) @@ -229,12 +229,12 @@ cmake_dependent_option( "NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF ) -if(EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT) +if(EXECUTORCH_BUILD_EXTENSION_LLM_AOT) set(EXECUTORCH_BUILD_EXTENSION_TENSOR ON) - set(EXECUTORCH_BUILD_KERNELS_CUSTOM ON) + set(EXECUTORCH_BUILD_EXTENSION_LLM ON) endif() -if(EXECUTORCH_BUILD_KERNELS_CUSTOM) +if(EXECUTORCH_BUILD_EXTENSION_LLM) set(EXECUTORCH_BUILD_KERNELS_OPTIMIZED ON) endif() @@ -786,10 +786,9 @@ if(EXECUTORCH_BUILD_PYBIND) ) endif() -if(EXECUTORCH_BUILD_KERNELS_CUSTOM) - # TODO: move all custom kernels to ${CMAKE_CURRENT_SOURCE_DIR}/kernels/custom - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/custom_ops) -endif() +if(EXECUTORCH_BUILD_EXTENSION_LLM) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/custom_ops) + endif() if(EXECUTORCH_BUILD_KERNELS_QUANTIZED) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/quantized) diff --git a/backends/cadence/build_cadence_xtensa.sh b/backends/cadence/build_cadence_xtensa.sh index eebd0707d19..cc08a9c282e 100644 --- a/backends/cadence/build_cadence_xtensa.sh +++ b/backends/cadence/build_cadence_xtensa.sh @@ -46,7 +46,7 @@ if $STEPWISE_BUILD; then -DEXECUTORCH_ENABLE_PROGRAM_VERIFICATION=ON \ -DEXECUTORCH_USE_DL=OFF \ -DBUILD_EXECUTORCH_PORTABLE_OPS=ON \ - -DEXECUTORCH_BUILD_KERNELS_CUSTOM=OFF \ + -DEXECUTORCH_BUILD_EXTENSION_LLM=OFF \ -DPYTHON_EXECUTABLE=python3 \ -DEXECUTORCH_NNLIB_OPT=ON \ -DEXECUTORCH_BUILD_GFLAGS=ON \ @@ -74,7 +74,7 @@ else -DEXECUTORCH_ENABLE_PROGRAM_VERIFICATION=ON \ -DEXECUTORCH_USE_DL=OFF \ -DBUILD_EXECUTORCH_PORTABLE_OPS=ON \ - -DEXECUTORCH_BUILD_KERNELS_CUSTOM=OFF \ + -DEXECUTORCH_BUILD_EXTENSION_LLM=OFF \ -DPYTHON_EXECUTABLE=python3 \ -DEXECUTORCH_NNLIB_OPT=ON \ -DHAVE_FNMATCH_H=OFF \ diff --git a/build/Utils.cmake b/build/Utils.cmake index 3ea616d5900..373652ff144 100644 --- a/build/Utils.cmake +++ b/build/Utils.cmake @@ -50,8 +50,8 @@ function(executorch_print_configuration_summary) STATUS " EXECUTORCH_BUILD_COREML : ${EXECUTORCH_BUILD_COREML}" ) - message(STATUS " EXECUTORCH_BUILD_KERNELS_CUSTOM : " - "${EXECUTORCH_BUILD_KERNELS_CUSTOM}" + message(STATUS " EXECUTORCH_BUILD_EXTENSION_LLM : " + "${EXECUTORCH_BUILD_EXTENSION_LLM}" ) message(STATUS " EXECUTORCH_BUILD_EXECUTOR_RUNNER : " "${EXECUTORCH_BUILD_EXECUTOR_RUNNER}" @@ -68,7 +68,7 @@ function(executorch_print_configuration_summary) message(STATUS " EXECUTORCH_BUILD_EXTENSION_TENSOR : " "${EXECUTORCH_BUILD_EXTENSION_TENSOR}" ) - message(STATUS " EXECUTORCH_BUILD_EXTENSION_TRAINING : " + message(STATUS " EXECUTORCH_BUILD_EXTENSION_TRAINING : " "${EXECUTORCH_BUILD_EXTENSION_TRAINING}" ) message( diff --git a/build/build_android_llm_demo.sh b/build/build_android_llm_demo.sh index 42034c254f4..df4b0008f1f 100644 --- a/build/build_android_llm_demo.sh +++ b/build/build_android_llm_demo.sh @@ -41,7 +41,7 @@ build_android_native_library() { -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ - -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ + -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \ -DEXECUTORCH_BUILD_QNN="${EXECUTORCH_BUILD_QNN}" \ -DQNN_SDK_ROOT="${QNN_SDK_ROOT}" \ -DCMAKE_BUILD_TYPE=Release \ @@ -61,7 +61,7 @@ build_android_native_library() { -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \ -DEXECUTORCH_ENABLE_LOGGING=ON \ -DEXECUTORCH_LOG_LEVEL=Info \ - -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ + -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \ -DEXECUTORCH_BUILD_LLAMA_JNI=ON \ -DCMAKE_BUILD_TYPE=Release \ -B"${CMAKE_OUT}"/extension/android diff --git a/build/build_apple_frameworks.sh b/build/build_apple_frameworks.sh index 9f111605de5..56bad69b1c2 100755 --- a/build/build_apple_frameworks.sh +++ b/build/build_apple_frameworks.sh @@ -168,7 +168,7 @@ cmake_build() { -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ - -DEXECUTORCH_BUILD_KERNELS_CUSTOM=$CUSTOM \ + -DEXECUTORCH_BUILD_EXTENSION_LLM=$CUSTOM \ -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=$OPTIMIZED \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=$QUANTIZED \ -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY="$(pwd)" \ diff --git a/docs/source/llm/build-run-llama3-qualcomm-ai-engine-direct-backend.md b/docs/source/llm/build-run-llama3-qualcomm-ai-engine-direct-backend.md index ac95fb21bd8..884f40aba94 100644 --- a/docs/source/llm/build-run-llama3-qualcomm-ai-engine-direct-backend.md +++ b/docs/source/llm/build-run-llama3-qualcomm-ai-engine-direct-backend.md @@ -59,7 +59,7 @@ llama3/Meta-Llama-3-8B-Instruct/tokenizer.model -p -c -c bool: @classmethod def llama_custom_ops(cls) -> bool: - return cls._is_env_enabled("EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT", default=True) + return cls._is_env_enabled("EXECUTORCH_BUILD_EXTENSION_LLM_AOT", default=True) @classmethod def flatc(cls) -> bool: @@ -542,8 +542,8 @@ def run(self): if ShouldBuild.llama_custom_ops(): cmake_args += [ - "-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON", # add llama sdpa ops to pybindings. - "-DEXECUTORCH_BUILD_KERNELS_CUSTOM_AOT=ON", + "-DEXECUTORCH_BUILD_EXTENSION_LLM=ON", # add llama sdpa ops to pybindings. + "-DEXECUTORCH_BUILD_EXTENSION_LLM_AOT=ON", "-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON", # add quantized ops to pybindings. "-DEXECUTORCH_BUILD_KERNELS_QUANTIZED_AOT=ON", ]