diff --git a/.ci/scripts/build_llama_android.sh b/.ci/scripts/build_llama_android.sh
index 7d3370ee561..99d03041b17 100644
--- a/.ci/scripts/build_llama_android.sh
+++ b/.ci/scripts/build_llama_android.sh
@@ -28,7 +28,7 @@ install_executorch_and_backend_lib() {
     -DEXECUTORCH_BUILD_XNNPACK=ON \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-    -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
     -DXNNPACK_ENABLE_ARM_BF16=OFF \
     -Bcmake-android-out .
 
@@ -47,7 +47,7 @@ build_llama_runner() {
     -DEXECUTORCH_BUILD_XNNPACK=ON \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-    -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
     -Bcmake-android-out/examples/models/llama2 examples/models/llama2
 
     cmake --build cmake-android-out/examples/models/llama2 -j4 --config Release
diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh
index 12ba9ba52db..ebf9f09d240 100644
--- a/.ci/scripts/test_llama.sh
+++ b/.ci/scripts/test_llama.sh
@@ -110,7 +110,7 @@ cmake_install_executorch_libraries() {
         -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
         -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
         -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-        -DEXECUTORCH_BUILD_KERNELS_CUSTOM="$CUSTOM" \
+        -DEXECUTORCH_BUILD_EXTENSION_LLM="$CUSTOM" \
         -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
         -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
         -DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
@@ -129,7 +129,7 @@ cmake_build_llama_runner() {
     retry cmake \
         -DCMAKE_INSTALL_PREFIX=cmake-out \
         -DCMAKE_BUILD_TYPE=Debug \
-        -DEXECUTORCH_BUILD_KERNELS_CUSTOM="$CUSTOM" \
+        -DEXECUTORCH_BUILD_EXTENSION_LLM="$CUSTOM" \
         -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
         -DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
         -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
diff --git a/.ci/scripts/test_llava.sh b/.ci/scripts/test_llava.sh
index 8ac87b2302d..fb214e4ec65 100644
--- a/.ci/scripts/test_llava.sh
+++ b/.ci/scripts/test_llava.sh
@@ -37,7 +37,7 @@ EXECUTORCH_COMMON_CMAKE_ARGS="                      \
         -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON      \
         -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
         -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON      \
-        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON        \
+        -DEXECUTORCH_BUILD_EXTENSION_LLM=ON        \
         -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON     \
         -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON     \
         -DEXECUTORCH_BUILD_XNNPACK=ON               \
@@ -68,7 +68,7 @@ LLAVA_COMMON_CMAKE_ARGS="                        \
         -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
         -DCMAKE_INSTALL_PREFIX=${BUILD_DIR}      \
         -DCMAKE_BUILD_TYPE=${BUILD_TYPE}         \
-        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON     \
+        -DEXECUTORCH_BUILD_EXTENSION_LLM=ON     \
         -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON  \
         -DEXECUTORCH_BUILD_XNNPACK=ON"
 
diff --git a/.ci/scripts/test_phi_3_mini.sh b/.ci/scripts/test_phi_3_mini.sh
index 40767013e23..05f1efa6b8a 100644
--- a/.ci/scripts/test_phi_3_mini.sh
+++ b/.ci/scripts/test_phi_3_mini.sh
@@ -32,7 +32,7 @@ cmake_install_executorch_libraries() {
       -DEXECUTORCH_BUILD_XNNPACK=ON \
       -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
       -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-      -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+      -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
       -B${BUILD_DIR} .
 
   cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${BUILD_TYPE}
@@ -42,7 +42,7 @@ cmake_build_phi_3_mini() {
   cmake -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
       -DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
       -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-      -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+      -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
       -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
       -DEXECUTORCH_BUILD_XNNPACK=ON \
       -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
index d7130561fa6..c516cb28e26 100644
--- a/.github/workflows/trunk.yml
+++ b/.github/workflows/trunk.yml
@@ -371,7 +371,7 @@ jobs:
             -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
             -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
             -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-            -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+            -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
             -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
             -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
             -DEXECUTORCH_BUILD_XNNPACK=ON \
@@ -384,7 +384,7 @@ jobs:
         cmake \
             -DCMAKE_INSTALL_PREFIX=cmake-out \
             -DCMAKE_BUILD_TYPE=Release \
-            -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+            -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
             -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
             -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
             -DEXECUTORCH_BUILD_XNNPACK=ON \
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 34fed923529..42dc792560b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -165,14 +165,14 @@ option(EXECUTORCH_BUILD_ARM_BAREMETAL
 
 option(EXECUTORCH_BUILD_COREML "Build the Core ML backend" OFF)
 
-option(EXECUTORCH_BUILD_KERNELS_CUSTOM "Build the custom kernels" OFF)
-
-option(EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT "Build the custom ops lib for AOT"
+option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "Build the Data Loader extension"
        OFF
 )
 
-option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "Build the Data Loader extension"
-       OFF
+option(EXECUTORCH_BUILD_EXTENSION_LLM "Build the LLM extension" OFF)
+
+option(EXECUTORCH_BUILD_EXTENSION_LLM_AOT 
+      "Build the LLM extension custom ops lib for AOT" OFF
 )
 
 option(EXECUTORCH_BUILD_EXTENSION_MODULE "Build the Module extension" OFF)
@@ -229,12 +229,12 @@ cmake_dependent_option(
   "NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF
 )
 
-if(EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT)
+if(EXECUTORCH_BUILD_EXTENSION_LLM_AOT)
   set(EXECUTORCH_BUILD_EXTENSION_TENSOR ON)
-  set(EXECUTORCH_BUILD_KERNELS_CUSTOM ON)
+  set(EXECUTORCH_BUILD_EXTENSION_LLM ON)
 endif()
 
-if(EXECUTORCH_BUILD_KERNELS_CUSTOM)
+if(EXECUTORCH_BUILD_EXTENSION_LLM)
   set(EXECUTORCH_BUILD_KERNELS_OPTIMIZED ON)
 endif()
 
@@ -786,10 +786,9 @@ if(EXECUTORCH_BUILD_PYBIND)
   )
 endif()
 
-if(EXECUTORCH_BUILD_KERNELS_CUSTOM)
-  # TODO: move all custom kernels to ${CMAKE_CURRENT_SOURCE_DIR}/kernels/custom
-  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/custom_ops)
-endif()
+if(EXECUTORCH_BUILD_EXTENSION_LLM)
+    add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/custom_ops)
+  endif()
 
 if(EXECUTORCH_BUILD_KERNELS_QUANTIZED)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/quantized)
diff --git a/backends/cadence/build_cadence_xtensa.sh b/backends/cadence/build_cadence_xtensa.sh
index eebd0707d19..cc08a9c282e 100644
--- a/backends/cadence/build_cadence_xtensa.sh
+++ b/backends/cadence/build_cadence_xtensa.sh
@@ -46,7 +46,7 @@ if $STEPWISE_BUILD; then
         -DEXECUTORCH_ENABLE_PROGRAM_VERIFICATION=ON \
         -DEXECUTORCH_USE_DL=OFF \
         -DBUILD_EXECUTORCH_PORTABLE_OPS=ON \
-        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=OFF \
+        -DEXECUTORCH_BUILD_EXTENSION_LLM=OFF \
         -DPYTHON_EXECUTABLE=python3 \
         -DEXECUTORCH_NNLIB_OPT=ON \
         -DEXECUTORCH_BUILD_GFLAGS=ON \
@@ -74,7 +74,7 @@ else
         -DEXECUTORCH_ENABLE_PROGRAM_VERIFICATION=ON \
         -DEXECUTORCH_USE_DL=OFF \
         -DBUILD_EXECUTORCH_PORTABLE_OPS=ON \
-        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=OFF \
+        -DEXECUTORCH_BUILD_EXTENSION_LLM=OFF \
         -DPYTHON_EXECUTABLE=python3 \
         -DEXECUTORCH_NNLIB_OPT=ON \
         -DHAVE_FNMATCH_H=OFF \
diff --git a/build/Utils.cmake b/build/Utils.cmake
index 3ea616d5900..373652ff144 100644
--- a/build/Utils.cmake
+++ b/build/Utils.cmake
@@ -50,8 +50,8 @@ function(executorch_print_configuration_summary)
     STATUS
       "  EXECUTORCH_BUILD_COREML                : ${EXECUTORCH_BUILD_COREML}"
   )
-  message(STATUS "  EXECUTORCH_BUILD_KERNELS_CUSTOM        : "
-                 "${EXECUTORCH_BUILD_KERNELS_CUSTOM}"
+  message(STATUS "  EXECUTORCH_BUILD_EXTENSION_LLM         : "
+                 "${EXECUTORCH_BUILD_EXTENSION_LLM}"
   )
   message(STATUS "  EXECUTORCH_BUILD_EXECUTOR_RUNNER       : "
                  "${EXECUTORCH_BUILD_EXECUTOR_RUNNER}"
@@ -68,7 +68,7 @@ function(executorch_print_configuration_summary)
   message(STATUS "  EXECUTORCH_BUILD_EXTENSION_TENSOR      : "
                  "${EXECUTORCH_BUILD_EXTENSION_TENSOR}"
   )
-  message(STATUS "  EXECUTORCH_BUILD_EXTENSION_TRAINING      : "
+  message(STATUS "  EXECUTORCH_BUILD_EXTENSION_TRAINING    : "
                  "${EXECUTORCH_BUILD_EXTENSION_TRAINING}"
   )
   message(
diff --git a/build/build_android_llm_demo.sh b/build/build_android_llm_demo.sh
index 42034c254f4..df4b0008f1f 100644
--- a/build/build_android_llm_demo.sh
+++ b/build/build_android_llm_demo.sh
@@ -41,7 +41,7 @@ build_android_native_library() {
     -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-    -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
     -DEXECUTORCH_BUILD_QNN="${EXECUTORCH_BUILD_QNN}" \
     -DQNN_SDK_ROOT="${QNN_SDK_ROOT}" \
     -DCMAKE_BUILD_TYPE=Release \
@@ -61,7 +61,7 @@ build_android_native_library() {
     -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
     -DEXECUTORCH_ENABLE_LOGGING=ON \
     -DEXECUTORCH_LOG_LEVEL=Info \
-    -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
     -DEXECUTORCH_BUILD_LLAMA_JNI=ON \
     -DCMAKE_BUILD_TYPE=Release \
     -B"${CMAKE_OUT}"/extension/android
diff --git a/build/build_apple_frameworks.sh b/build/build_apple_frameworks.sh
index 9f111605de5..56bad69b1c2 100755
--- a/build/build_apple_frameworks.sh
+++ b/build/build_apple_frameworks.sh
@@ -168,7 +168,7 @@ cmake_build() {
         -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
         -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
         -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=$CUSTOM \
+        -DEXECUTORCH_BUILD_EXTENSION_LLM=$CUSTOM \
         -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=$OPTIMIZED \
         -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=$QUANTIZED \
         -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY="$(pwd)" \
diff --git a/docs/source/llm/build-run-llama3-qualcomm-ai-engine-direct-backend.md b/docs/source/llm/build-run-llama3-qualcomm-ai-engine-direct-backend.md
index ac95fb21bd8..884f40aba94 100644
--- a/docs/source/llm/build-run-llama3-qualcomm-ai-engine-direct-backend.md
+++ b/docs/source/llm/build-run-llama3-qualcomm-ai-engine-direct-backend.md
@@ -59,7 +59,7 @@ llama3/Meta-Llama-3-8B-Instruct/tokenizer.model -p <path_to_params.json> -c <pat
         -DQNN_SDK_ROOT=${QNN_SDK_ROOT} \
         -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
         -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
         -Bcmake-android-out .
 
     cmake --build cmake-android-out -j16 --target install --config Release
@@ -75,7 +75,7 @@ llama3/Meta-Llama-3-8B-Instruct/tokenizer.model -p <path_to_params.json> -c <pat
         -DEXECUTORCH_BUILD_QNN=ON \
         -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
         -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
         -Bcmake-android-out/examples/models/llama2 examples/models/llama2
 
     cmake --build cmake-android-out/examples/models/llama2 -j16 --config Release
diff --git a/examples/demo-apps/android/LlamaDemo/docs/delegates/qualcomm_README.md b/examples/demo-apps/android/LlamaDemo/docs/delegates/qualcomm_README.md
index 6513d976d19..29566c378ab 100644
--- a/examples/demo-apps/android/LlamaDemo/docs/delegates/qualcomm_README.md
+++ b/examples/demo-apps/android/LlamaDemo/docs/delegates/qualcomm_README.md
@@ -64,7 +64,7 @@ cmake -DPYTHON_EXECUTABLE=python \
     -DQNN_SDK_ROOT=${QNN_SDK_ROOT} \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-    -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
     -Bcmake-out .
 cmake --build cmake-out -j16 --target install --config Release
 ```
@@ -81,7 +81,7 @@ cmake -DPYTHON_EXECUTABLE=python \
     -DCMAKE_BUILD_TYPE=Release \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-    -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
     -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
     -DEXECUTORCH_BUILD_QNN=ON \
     -Bcmake-out/examples/models/llama2 \
diff --git a/examples/demo-apps/android/LlamaDemo/setup-with-qnn.sh b/examples/demo-apps/android/LlamaDemo/setup-with-qnn.sh
index df70725942d..e1b1b99790c 100644
--- a/examples/demo-apps/android/LlamaDemo/setup-with-qnn.sh
+++ b/examples/demo-apps/android/LlamaDemo/setup-with-qnn.sh
@@ -37,7 +37,7 @@ cmake extension/android \
   -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
   -DEXECUTORCH_BUILD_LLAMA_JNI=ON \
   -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-  -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+  -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
   -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \
   -DCMAKE_BUILD_TYPE=Release \
   -B"${CMAKE_OUT}"/extension/android
diff --git a/examples/demo-apps/android/LlamaDemo/setup.sh b/examples/demo-apps/android/LlamaDemo/setup.sh
index b89c1829944..de851e0997f 100644
--- a/examples/demo-apps/android/LlamaDemo/setup.sh
+++ b/examples/demo-apps/android/LlamaDemo/setup.sh
@@ -20,7 +20,7 @@ cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
   -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
   -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
   -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-  -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+  -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
   -DCMAKE_BUILD_TYPE=Release \
   -B"${CMAKE_OUT}"
 
@@ -37,7 +37,7 @@ cmake extension/android \
   -DANDROID_PLATFORM=android-23 \
   -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
   -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \
-  -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+  -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
   -DEXECUTORCH_BUILD_LLAMA_JNI=ON \
   -DCMAKE_BUILD_TYPE=Release \
   -B"${CMAKE_OUT}"/extension/android
diff --git a/examples/llm_manual/build/schema/include/executorch/backends/xnnpack/serialization/schema_generated.h b/examples/llm_manual/build/schema/include/executorch/backends/xnnpack/serialization/schema_generated.h
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/examples/models/llama2/CMakeLists.txt b/examples/models/llama2/CMakeLists.txt
index 7a9b69d65b1..ba27cf1c4f3 100644
--- a/examples/models/llama2/CMakeLists.txt
+++ b/examples/models/llama2/CMakeLists.txt
@@ -83,7 +83,7 @@ if(CMAKE_TOOLCHAIN_IOS OR ANDROID)
 endif()
 
 # custom ops library
-if(EXECUTORCH_BUILD_KERNELS_CUSTOM)
+if(EXECUTORCH_BUILD_EXTENSION_LLM)
   add_subdirectory(
     ${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/custom_ops
     ${CMAKE_CURRENT_BINARY_DIR}/../../../extension/llm/custom_ops
@@ -116,7 +116,7 @@ endif()
 target_link_options_shared_lib(quantized_ops_lib)
 list(APPEND link_libraries quantized_kernels quantized_ops_lib)
 
-if(EXECUTORCH_BUILD_KERNELS_CUSTOM)
+if(EXECUTORCH_BUILD_EXTENSION_LLM)
   target_link_options_shared_lib(custom_ops)
   list(APPEND link_libraries custom_ops)
 endif()
diff --git a/examples/models/llama2/README.md b/examples/models/llama2/README.md
index e6d247a1d30..fa0cb05b03f 100644
--- a/examples/models/llama2/README.md
+++ b/examples/models/llama2/README.md
@@ -291,7 +291,7 @@ The Wikitext results generated above used: `{max_seq_len: 2048, limit: 1000}`
         -DEXECUTORCH_BUILD_XNNPACK=ON \
         -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
         -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
         -Bcmake-out .
 
     cmake --build cmake-out -j16 --target install --config Release
@@ -303,7 +303,7 @@ Note for Mac users: There's a known linking issue with Xcode 15.1. Refer to the
     cmake -DPYTHON_EXECUTABLE=python \
         -DCMAKE_INSTALL_PREFIX=cmake-out \
         -DCMAKE_BUILD_TYPE=Release \
-        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
         -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
         -DEXECUTORCH_BUILD_XNNPACK=ON \
         -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
@@ -345,7 +345,7 @@ cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
     -DEXECUTORCH_BUILD_XNNPACK=ON \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-    -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
     -Bcmake-out-android .
 
 cmake --build cmake-out-android -j16 --target install --config Release
@@ -362,7 +362,7 @@ cmake  -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
     -DEXECUTORCH_BUILD_XNNPACK=ON \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-    -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
     -Bcmake-out-android/examples/models/llama2 \
     examples/models/llama2
 
diff --git a/examples/models/llava/CMakeLists.txt b/examples/models/llava/CMakeLists.txt
index c36e39a04cb..a1df5cae215 100644
--- a/examples/models/llava/CMakeLists.txt
+++ b/examples/models/llava/CMakeLists.txt
@@ -95,7 +95,7 @@ if(CMAKE_TOOLCHAIN_IOS OR ANDROID)
 endif()
 
 # custom ops library
-if(EXECUTORCH_BUILD_KERNELS_CUSTOM)
+if(EXECUTORCH_BUILD_EXTENSION_LLM)
   add_subdirectory(
     ${EXECUTORCH_ROOT}/extension/llm/custom_ops
     ${CMAKE_CURRENT_BINARY_DIR}/../../../extension/llm/custom_ops
@@ -132,7 +132,7 @@ endif()
 target_link_options_shared_lib(quantized_ops_lib)
 list(APPEND link_libraries quantized_kernels quantized_ops_lib)
 
-if(EXECUTORCH_BUILD_KERNELS_CUSTOM)
+if(EXECUTORCH_BUILD_EXTENSION_LLM)
   target_link_options_shared_lib(custom_ops)
   list(APPEND link_libraries custom_ops)
 endif()
diff --git a/examples/models/phi-3-mini/README.md b/examples/models/phi-3-mini/README.md
index e5a86c41777..b0e6a04fd5a 100644
--- a/examples/models/phi-3-mini/README.md
+++ b/examples/models/phi-3-mini/README.md
@@ -32,7 +32,7 @@ python -m examples.models.phi-3-mini.export_phi-3-mini -c "4k" -s 128 -o phi-3-m
      -DEXECUTORCH_BUILD_XNNPACK=ON \
      -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
      -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-     -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+     -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
      -Bcmake-out .
 
  cmake --build cmake-out -j16 --target install --config Release
@@ -42,7 +42,7 @@ python -m examples.models.phi-3-mini.export_phi-3-mini -c "4k" -s 128 -o phi-3-m
 cmake -DPYTHON_EXECUTABLE=python \
     -DCMAKE_INSTALL_PREFIX=cmake-out \
     -DCMAKE_BUILD_TYPE=Release \
-    -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
     -DEXECUTORCH_BUILD_XNNPACK=ON \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
diff --git a/extension/android/CMakeLists.txt b/extension/android/CMakeLists.txt
index 62cfa54558e..0e001f0781b 100644
--- a/extension/android/CMakeLists.txt
+++ b/extension/android/CMakeLists.txt
@@ -80,7 +80,7 @@ if(TARGET vulkan_backend)
   list(APPEND link_libraries vulkan_backend)
 endif()
 
-if(EXECUTORCH_BUILD_KERNELS_CUSTOM)
+if(EXECUTORCH_BUILD_EXTENSION_LLM)
   add_subdirectory(
     ${EXECUTORCH_ROOT}/extension/llm/custom_ops
     ${CMAKE_CURRENT_BINARY_DIR}/../../extension/llm/custom_ops
diff --git a/extension/llm/custom_ops/CMakeLists.txt b/extension/llm/custom_ops/CMakeLists.txt
index d42e37f9bd0..837f3deb1d3 100644
--- a/extension/llm/custom_ops/CMakeLists.txt
+++ b/extension/llm/custom_ops/CMakeLists.txt
@@ -69,7 +69,7 @@ target_compile_options(
 
 install(TARGETS custom_ops DESTINATION lib)
 
-if(EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT)
+if(EXECUTORCH_BUILD_EXTENSION_LLM_AOT)
   # Add a AOT library
   find_package(Torch CONFIG REQUIRED)
   add_library(
diff --git a/setup.py b/setup.py
index f6adb4f86c3..a72ccd05a46 100644
--- a/setup.py
+++ b/setup.py
@@ -88,7 +88,7 @@ def pybindings(cls) -> bool:
 
     @classmethod
     def llama_custom_ops(cls) -> bool:
-        return cls._is_env_enabled("EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT", default=True)
+        return cls._is_env_enabled("EXECUTORCH_BUILD_EXTENSION_LLM_AOT", default=True)
 
     @classmethod
     def flatc(cls) -> bool:
@@ -542,8 +542,8 @@ def run(self):
 
         if ShouldBuild.llama_custom_ops():
             cmake_args += [
-                "-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON",  # add llama sdpa ops to pybindings.
-                "-DEXECUTORCH_BUILD_KERNELS_CUSTOM_AOT=ON",
+                "-DEXECUTORCH_BUILD_EXTENSION_LLM=ON",  # add llama sdpa ops to pybindings.
+                "-DEXECUTORCH_BUILD_EXTENSION_LLM_AOT=ON",
                 "-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON",  # add quantized ops to pybindings.
                 "-DEXECUTORCH_BUILD_KERNELS_QUANTIZED_AOT=ON",
             ]