@@ -45,17 +45,45 @@ def read_version(file_path="version.txt"):
45
45
if version_suffix is None :
46
46
version_suffix = f"+git{ get_git_commit_id ()} "
47
47
48
- use_cpp = os .getenv ("USE_CPP" )
49
-
50
48
import platform
51
49
52
- build_macos_arm_auto = (
53
- use_cpp == "1"
54
- and platform .machine ().startswith ("arm64" )
55
- and platform .system () == "Darwin"
56
- )
50
+ ################################################################################
51
+ # Build Configuration - Environment Variables and Build Options
52
+ ################################################################################
53
+
54
+ # Core build toggles
55
+ use_cpp = os .getenv ("USE_CPP" , "1" )
56
+ use_cpu_kernels = os .getenv ("USE_CPU_KERNELS" , "0" ) == "1"
57
+
58
+ # Platform detection
59
+ is_arm64 = platform .machine ().startswith ("arm64" ) or platform .machine () == "aarch64"
60
+ is_macos = platform .system () == "Darwin"
61
+ is_linux = platform .system () == "Linux"
62
+
63
+ # Auto-enable experimental builds on ARM64 macOS when USE_CPP=1
64
+ build_macos_arm_auto = use_cpp == "1" and is_arm64 and is_macos
65
+
66
+ # Build configuration hierarchy and relationships:
67
+ #
68
+ # Level 1: USE_CPP (Primary gate)
69
+ # ├── "0" → Skip all C++ extensions (Python-only mode)
70
+ # └── "1"/None → Build C++ extensions
71
+ #
72
+ # Level 2: Platform-specific optimizations
73
+ # ├── USE_CPU_KERNELS="1" + Linux → Include optimized CPU kernels (AVX512, etc.)
74
+ # └── ARM64 + macOS → Auto-enable experimental builds (build_macos_arm_auto)
75
+ #
76
+ # Level 3: Experimental builds (cmake-based)
77
+ # ├── BUILD_TORCHAO_EXPERIMENTAL="1" → Force experimental builds
78
+ # ├── build_macos_arm_auto → Auto-enable on ARM64 macOS
79
+ # └── When enabled, provides access to:
80
+ # ├── TORCHAO_BUILD_CPU_AARCH64 → ARM64 CPU kernels
81
+ # ├── TORCHAO_BUILD_KLEIDIAI → Kleidi AI library integration
82
+ # ├── TORCHAO_BUILD_EXPERIMENTAL_MPS → MPS acceleration (macOS only)
83
+ # ├── TORCHAO_ENABLE_ARM_NEON_DOT → ARM NEON dot product instructions
84
+ # ├── TORCHAO_ENABLE_ARM_I8MM → ARM 8-bit integer matrix multiply
85
+ # └── TORCHAO_PARALLEL_BACKEND → Backend selection (aten_openmp, executorch, etc.)
57
86
58
- use_cpp_kernels = os .getenv ("USE_CPP_KERNELS" , "0" ) == "1"
59
87
60
88
from torchao .utils import TORCH_VERSION_AT_LEAST_2_7
61
89
@@ -92,12 +120,10 @@ def __init__(self):
92
120
# can be built by explicitly setting TORCHAO_BUILD_CPU_AARCH64=1
93
121
self .build_cpu_aarch64 = self ._os_bool_var (
94
122
"TORCHAO_BUILD_CPU_AARCH64" ,
95
- default = (self . _is_arm64 () and self . _is_macos () ),
123
+ default = (is_arm64 and is_macos ),
96
124
)
97
125
if self .build_cpu_aarch64 :
98
- assert self ._is_arm64 (), (
99
- "TORCHAO_BUILD_CPU_AARCH64 requires an arm64 machine"
100
- )
126
+ assert is_arm64 , "TORCHAO_BUILD_CPU_AARCH64 requires an arm64 machine"
101
127
102
128
# TORCHAO_BUILD_KLEIDIAI is disabled by default for now because
103
129
# 1) It increases the build time
@@ -115,8 +141,8 @@ def __init__(self):
115
141
"TORCHAO_BUILD_EXPERIMENTAL_MPS" , default = False
116
142
)
117
143
if self .build_experimental_mps :
118
- assert self . _is_macos () , "TORCHAO_BUILD_EXPERIMENTAL_MPS requires MacOS "
119
- assert self . _is_arm64 () , "TORCHAO_BUILD_EXPERIMENTAL_MPS requires arm64"
144
+ assert is_macos , "TORCHAO_BUILD_EXPERIMENTAL_MPS requires macOS "
145
+ assert is_arm64 , "TORCHAO_BUILD_EXPERIMENTAL_MPS requires arm64"
120
146
assert torch .mps .is_available (), (
121
147
"TORCHAO_BUILD_EXPERIMENTAL_MPS requires MPS be available"
122
148
)
@@ -129,7 +155,7 @@ def __init__(self):
129
155
# Enabled by default on macOS silicon
130
156
self .enable_arm_neon_dot = self ._os_bool_var (
131
157
"TORCHAO_ENABLE_ARM_NEON_DOT" ,
132
- default = (self . _is_arm64 () and self . _is_macos () ),
158
+ default = (is_arm64 and is_macos ),
133
159
)
134
160
if self .enable_arm_neon_dot :
135
161
assert self .build_cpu_aarch64 , (
@@ -146,12 +172,6 @@ def __init__(self):
146
172
"TORCHAO_ENABLE_ARM_I8MM requires TORCHAO_BUILD_CPU_AARCH64 be set"
147
173
)
148
174
149
- def _is_arm64 (self ) -> bool :
150
- return platform .machine ().startswith ("arm64" ) or platform .machine () == "aarch64"
151
-
152
- def _is_macos (self ) -> bool :
153
- return platform .system () == "Darwin"
154
-
155
175
def _os_bool_var (self , var , default ) -> bool :
156
176
default_val = "1" if default else "0"
157
177
return os .getenv (var , default_val ) == "1"
@@ -323,6 +343,11 @@ def __init__(
323
343
324
344
325
345
def get_extensions ():
346
+ # Skip building C++ extensions if USE_CPP is set to "0"
347
+ if use_cpp == "0" :
348
+ print ("USE_CPP=0: Skipping compilation of C++ extensions" )
349
+ return []
350
+
326
351
debug_mode = use_debug_mode ()
327
352
if debug_mode :
328
353
print ("Compiling in debug mode" )
@@ -363,11 +388,7 @@ def get_extensions():
363
388
["-O3" if not debug_mode else "-O0" , "-fdiagnostics-color=always" ]
364
389
)
365
390
366
- if (
367
- use_cpp_kernels
368
- and platform .system () == "Linux"
369
- and TORCH_VERSION_AT_LEAST_2_7
370
- ):
391
+ if use_cpu_kernels and is_linux and TORCH_VERSION_AT_LEAST_2_7 :
371
392
if torch ._C ._cpu ._is_avx512_supported ():
372
393
extra_compile_args ["cxx" ].extend (
373
394
[
@@ -427,7 +448,7 @@ def get_extensions():
427
448
428
449
# Collect C++ source files
429
450
sources = list (glob .glob (os .path .join (extensions_dir , "**/*.cpp" ), recursive = True ))
430
- if not use_cpp_kernels or platform . system () != "Linux" :
451
+ if not use_cpu_kernels or not is_linux :
431
452
# Remove csrc/cpu/*.cpp
432
453
excluded_sources = list (
433
454
glob .glob (os .path .join (extensions_dir , "cpu/*.cpp" ), recursive = True )
@@ -652,7 +673,9 @@ def bool_to_on_off(value):
652
673
return ext_modules
653
674
654
675
655
- check_submodules ()
676
+ # Only check submodules if we're going to build C++ extensions
677
+ if use_cpp != "0" :
678
+ check_submodules ()
656
679
657
680
setup (
658
681
name = "torchao" ,
0 commit comments