Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit c04464f

Browse files
authored
Merge pull request #165 from facebookresearch/dev-rebased
Dev rebased
2 parents e12be58 + 28e6531 commit c04464f

File tree

85 files changed

+1839
-1298
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+1839
-1298
lines changed

.gitmodules

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
[submodule "third-party/islpp"]
33
path = third-party/islpp
44
url = https://github.com/nicolasvasilache/isl.git
5-
branch = ntv_dev
5+
branch = ntv_dev_cpp
66
[submodule "third-party/cub"]
77
path = third-party/cub
88
url = https://github.com/nicolasvasilache/cub.git

CMakeLists.txt

Lines changed: 52 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -104,36 +104,44 @@ link_directories(${LLVM_LIBDIR})
104104
# TODO: check if this respects CMAKE_PREFIX_PATH
105105
find_package(Protobuf REQUIRED)
106106

107-
# cuda
108-
find_package(CUDA REQUIRED)
109-
include_directories(${CUDA_TOOLKIT_ROOT_DIR}/include)
110-
111-
# modified CUB
112-
find_path(CUB_INCLUDE_DIR NAMES cub)
113-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DCUDA_HOME=\"\\\"${CUDA_INCLUDE_DIRS}\\\"\" -DCUB_HOME=\"\\\"${CUB_INCLUDE_DIR}\\\"\" ")
114-
115-
# Inherited from Torch, see
116-
# https://github.com/torch/cutorch/blob/master/lib/THC/cmake/select_compute_arch.cmake
117-
INCLUDE(cmake/select_compute_arch.cmake)
118-
CUDA_SELECT_NVCC_ARCH_FLAGS(NVCC_FLAGS_EXTRA)
119-
120-
# TODO: Investigate these
121-
set (CUDA_VERBOSE_BUILD ON)
122-
set (CUDA_PROPAGATE_HOST_FLAGS ON)
123-
124-
################################################################################
125-
# FindCUDA doesn't find all the libraries we need, add the extra ones
126-
# Cribbed from /lib64 ${CUDA_TOOLKIT_ROOT_DIR}/lib
127107
################################################################################
128-
find_library(CUDA_CUDA_LIBRARIES cuda
129-
PATHS ${CUDA_TOOLKIT_ROOT_DIR}
130-
PATH_SUFFIXES lib lib64 targets/x86_64-linux/lib targets/x86_64-linux/lib/stubs)
131108

132-
set(CUDA_CUDA_LIBRARY ${CUDA_CUDA_LIBRARIES})
133-
134-
# Needed to build C2 and some of our tests
135-
INCLUDE(cmake/FindCuDNN.cmake)
136-
include_directories(${CUDNN_INCLUDE_DIR})
109+
# cuda
110+
set(WITH_CUDA ON CACHE BOOL "Whether to build with CUDA support")
111+
112+
if(WITH_CUDA)
113+
find_package(CUDA REQUIRED)
114+
include_directories(${CUDA_TOOLKIT_ROOT_DIR}/include)
115+
116+
# modified CUB
117+
find_path(CUB_INCLUDE_DIR NAMES cub)
118+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DWITH_CUDA -DCUDA_HOME=\"\\\"${CUDA_INCLUDE_DIRS}\\\"\" -DCUB_HOME=\"\\\"${CUB_INCLUDE_DIR}\\\"\" ")
119+
120+
# Inherited from Torch, see
121+
# https://github.com/torch/cutorch/blob/master/lib/THC/cmake/select_compute_arch.cmake
122+
INCLUDE(cmake/select_compute_arch.cmake)
123+
CUDA_SELECT_NVCC_ARCH_FLAGS(NVCC_FLAGS_EXTRA)
124+
125+
# TODO: Investigate these
126+
set (CUDA_VERBOSE_BUILD ON)
127+
set (CUDA_PROPAGATE_HOST_FLAGS ON)
128+
129+
##############################################################################
130+
# FindCUDA doesn't find all the libraries we need, add the extra ones
131+
# Cribbed from /lib64 ${CUDA_TOOLKIT_ROOT_DIR}/lib
132+
##############################################################################
133+
find_library(CUDA_CUDA_LIBRARIES cuda
134+
PATHS ${CUDA_TOOLKIT_ROOT_DIR}
135+
PATH_SUFFIXES lib lib64 targets/x86_64-linux/lib targets/x86_64-linux/lib/stubs)
136+
137+
set(CUDA_CUDA_LIBRARY ${CUDA_CUDA_LIBRARIES})
138+
139+
# Needed to build C2 and some of our tests
140+
INCLUDE(cmake/FindCuDNN.cmake)
141+
include_directories(${CUDNN_INCLUDE_DIR})
142+
else()
143+
message(STATUS "Building TC without CUDA support")
144+
endif()
137145

138146
################################################################################
139147

@@ -143,7 +151,7 @@ if(NOT HALIDE_PREFIX)
143151
endif()
144152
message(STATUS "HALIDE_PREFIX: ${HALIDE_PREFIX}")
145153
message(STATUS "Finding Halide")
146-
find_path(HALIDE_INCLUDE_DIR NAMES Halide)
154+
find_path(HALIDE_INCLUDE_DIR NAMES Halide.h)
147155
message(STATUS "HALIDE_INCLUDE_DIRS: ${HALIDE_INCLUDE_DIR}")
148156
include_directories(${HALIDE_INCLUDE_DIR})
149157
find_library(HALIDE_LIBRARIES NAMES Halide PATHS ${CMAKE_INSTALL_PREFIX} PATH_SUFFIXES lib lib64)
@@ -166,8 +174,10 @@ if (WITH_CAFFE2)
166174
include_directories(${CAFFE2_INCLUDE_DIR})
167175
find_library(CAFFE2_CPU_LIBRARIES NAMES caffe2 PATHS ${CMAKE_INSTALL_PREFIX} PATH_SUFFIXES lib lib64)
168176
message(STATUS "Found Caffe2_CPU: ${CAFFE2_CPU_LIBRARIES}")
169-
find_library(CAFFE2_GPU_LIBRARIES NAMES caffe2_gpu PATHS ${CMAKE_INSTALL_PREFIX} PATH_SUFFIXES lib lib64)
170-
message(STATUS "Found Caffe2_GPU: ${CAFFE2_GPU_LIBRARIES}")
177+
if (WITH_CUDA)
178+
find_library(CAFFE2_GPU_LIBRARIES NAMES caffe2_gpu PATHS ${CMAKE_INSTALL_PREFIX} PATH_SUFFIXES lib lib64)
179+
message(STATUS "Found Caffe2_GPU: ${CAFFE2_GPU_LIBRARIES}")
180+
endif()
171181
else()
172182
message(STATUS "Caffe2 installation is turned off")
173183
endif()
@@ -236,12 +246,20 @@ message(STATUS "Found glog: ${GLOG_LIBRARIES}")
236246
# endforeach()
237247

238248
add_subdirectory(src)
249+
250+
# At the moment pybind is only supported in CUDA mode and compilation fails
251+
# for non-CUDA mode (CUDA_HOME and CUB_HOME undefined error).
252+
# Once the core CPU mapper is stabilized we can worry about pybind, deactivate
253+
# conditionally for now
254+
if (WITH_CUDA)
255+
add_subdirectory(tensor_comprehensions/pybinds)
256+
endif()
257+
239258
enable_testing()
240-
add_subdirectory(tensor_comprehensions/pybinds)
241259
add_subdirectory(test)
242260

243-
if (WITH_CAFFE2)
261+
if (WITH_CAFFE2 AND WITH_CUDA)
244262
add_subdirectory(examples)
245263
else()
246-
message(STATUS "Not building examples, caffe2 not available")
264+
message(STATUS "Not building examples, caffe2 or CUDA not available")
247265
endif()

build.sh

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,13 @@ if ! test ${CLANG_PREFIX}; then
88
exit 1
99
fi
1010

11-
ATEN_NO_CUDA=${ATEN_NO_CUDA:=0}
1211
WITH_CAFFE2=${WITH_CAFFE2:=ON}
12+
WITH_CUDA=${WITH_CUDA:=ON}
13+
if [ "${WITH_CUDA,,}" = "off" -o "${WITH_CUDA,,}" = "no" -o "${WITH_CUDA}" = "0" ]; then
14+
ATEN_NO_CUDA=1
15+
else
16+
ATEN_NO_CUDA=${ATEN_NO_CUDA:=0}
17+
fi
1318
WITH_PYTHON_C2=${WITH_PYTHON_C2:=OFF}
1419
WITH_NNPACK=${WITH_NNPACK:=OFF}
1520
PYTHON=${PYTHON:="`which python3`"}
@@ -382,6 +387,7 @@ function install_tc() {
382387
-DPROTOBUF_PROTOC_EXECUTABLE=${PROTOC} \
383388
-DCLANG_PREFIX=${CLANG_PREFIX} \
384389
-DCUDNN_ROOT_DIR=${CUDNN_ROOT_DIR} \
390+
-DWITH_CUDA=${WITH_CUDA} \
385391
-DCMAKE_C_COMPILER=${CC} \
386392
-DCMAKE_CXX_COMPILER=${CXX} .. || exit 1
387393
fi
@@ -399,14 +405,13 @@ function install_halide() {
399405
mkdir -p ${TC_DIR}/third-party/halide/build || exit 1
400406
cd ${TC_DIR}/third-party/halide/build || exit 1
401407

402-
if ! test ${USE_CONTBUILD_CACHE} || [ ! -d "${INSTALL_PREFIX}/include/Halide" ]; then
408+
if ! test ${USE_CONTBUILD_CACHE} || [ ! -e "${INSTALL_PREFIX}/include/Halide.h" ]; then
409+
LLVM_CONFIG_FROM_PREFIX=${CLANG_PREFIX}/bin/llvm-config
410+
LLVM_CONFIG=$( which $LLVM_CONFIG_FROM_PREFIX || which llvm-config-4.0 || which llvm-config )
411+
CLANG_FROM_PREFIX=${CLANG_PREFIX}/bin/clang
412+
CLANG=$( which $CLANG_FROM_PREFIX || which clang-4.0 || which clang )
403413

404414
if should_rebuild ${TC_DIR}/third-party/halide ${HALIDE_BUILD_CACHE}; then
405-
LLVM_CONFIG_FROM_PREFIX=${CLANG_PREFIX}/bin/llvm-config
406-
LLVM_CONFIG=$( which $LLVM_CONFIG_FROM_PREFIX || which llvm-config-4.0 || which llvm-config )
407-
CLANG_FROM_PREFIX=${CLANG_PREFIX}/bin/clang
408-
CLANG=$( which $CLANG_FROM_PREFIX || which clang-4.0 || which clang )
409-
410415
CLANG=${CLANG} \
411416
LLVM_CONFIG=${LLVM_CONFIG} \
412417
VERBOSE=${VERBOSE} \
@@ -416,12 +421,21 @@ function install_halide() {
416421
WITH_OPENGL= \
417422
WITH_METAL= \
418423
WITH_EXCEPTIONS=1 \
419-
make -f ../Makefile -j $CORES install || exit 1
420-
mkdir -p ${INSTALL_PREFIX}/include/Halide
421-
mv ${INSTALL_PREFIX}/include/Halide*.h ${INSTALL_PREFIX}/include/Halide/
424+
make -f ../Makefile -j $CORES || exit 1
422425
set_bcache ${TC_DIR}/third-party/halide ${HALIDE_BUILD_CACHE}
423426
fi
424427

428+
CLANG=${CLANG} \
429+
LLVM_CONFIG=${LLVM_CONFIG} \
430+
VERBOSE=${VERBOSE} \
431+
PREFIX=${INSTALL_PREFIX} \
432+
WITH_LLVM_INSIDE_SHARED_LIBHALIDE= \
433+
WITH_OPENCL= \
434+
WITH_OPENGL= \
435+
WITH_METAL= \
436+
WITH_EXCEPTIONS=1 \
437+
make -f ../Makefile -j $CORES install || exit 1
438+
425439
echo "Successfully installed Halide"
426440

427441
fi

examples/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ foreach(i ${EXAMPLES_FILES})
3333
add_test(${i} ${i})
3434
target_link_libraries(
3535
${i}
36-
tc_aten
3736
tc_autotuner
3837
tc_core
3938
tc_c2
@@ -43,5 +42,7 @@ foreach(i ${EXAMPLES_FILES})
4342
${GTEST_LIBS}
4443
${GFLAGS_LIBRARIES}
4544
${GLOG_LIBRARIES}
45+
46+
${ATEN_LIBRARIES}
4647
)
4748
endforeach()

examples/example_MLP_model.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@
2727
#include "tc/core/mapping_options.h"
2828

2929
#include "../test/test_harness.h"
30-
#include "../test/test_harness_aten.h"
30+
#include "../test/test_harness_aten_cuda.h"
3131
#include "example_fixture.h"
3232

3333
#include "tc/c2/context.h"
34-
#include "tc/core/cuda.h"
34+
#include "tc/core/cuda/cuda.h"
3535
#include "tc/core/flags.h"
3636

3737
using namespace caffe2;

examples/example_batchmatmul.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@
2727
#include "tc/core/mapping_options.h"
2828

2929
#include "../test/test_harness.h"
30-
#include "../test/test_harness_aten.h"
30+
#include "../test/test_harness_aten_cuda.h"
3131
#include "example_fixture.h"
3232

3333
#include "tc/c2/context.h"
34-
#include "tc/core/cuda.h"
34+
#include "tc/core/cuda/cuda.h"
3535
#include "tc/core/flags.h"
3636

3737
using namespace caffe2;

examples/example_fixture.h

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,12 @@
2929
#include "tc/aten/aten_compiler.h"
3030
#include "tc/autotuner/genetic_autotuner_aten.h"
3131
#include "tc/autotuner/utils/utils.h"
32-
#include "tc/core/compilation_cache.h"
33-
#include "tc/core/cuda.h"
32+
#include "tc/core/cuda/cuda.h"
33+
#include "tc/core/cuda/cuda_compilation_cache.h"
34+
#include "tc/core/cuda/cuda_rtc.h"
35+
#include "tc/core/cuda/cuda_tc_executor.h"
3436
#include "tc/core/flags.h"
3537
#include "tc/core/mapping_options.h"
36-
#include "tc/core/rtc.h"
3738
#include "tc/core/scope_guard.h"
3839

3940
#include <cublas_v2.h> // Must be the same as Caffe2
@@ -63,7 +64,7 @@ std::vector<const DLTensor*> inferOutputTensorInfo(
6364
const std::string& tc,
6465
const std::string& name,
6566
const std::vector<at::Tensor>& inputs) {
66-
tc::ATenCompilationUnit atCompl;
67+
tc::ATenCompilationUnit<tc::CudaTcExecutor> atCompl;
6768
atCompl.define(tc);
6869
return atCompl.inferOutputTensorInfo(name, inputs);
6970
}
@@ -133,7 +134,7 @@ struct Benchmark : public ::testing::Test {
133134
std::vector<at::Tensor>& outputs) {
134135
return true;
135136
}) {
136-
tc::ATenCompilationUnit atCompl;
137+
tc::ATenCompilationUnit<tc::CudaTcExecutor> atCompl;
137138
atCompl.define(tc);
138139
auto handle = atCompl.compile(name, inputs, mappingOptions);
139140
atCompl.run(name, inputs, outputs, handle);
@@ -281,7 +282,7 @@ struct Benchmark : public ::testing::Test {
281282
tc::CudaCache::loadCacheFromProtobuf(tc::makeCudaFilename(cacheFilename));
282283
tc::FLAGS_tuner_gen_restore_number = 1;
283284

284-
tc::ATenCompilationUnit atCompl;
285+
tc::ATenCompilationUnit<tc::CudaTcExecutor> atCompl;
285286
atCompl.define(tc);
286287

287288
auto mappingOptions = [&]() {
@@ -399,7 +400,7 @@ struct Benchmark : public ::testing::Test {
399400
return *options;
400401
}();
401402

402-
tc::ATenCompilationUnit atCompl;
403+
tc::ATenCompilationUnit<tc::CudaTcExecutor> atCompl;
403404
atCompl.define(TC);
404405
auto handle = atCompl.compile(kernelName, inputs, bestOptions);
405406
std::vector<at::Tensor> outputs;

examples/example_group_convolution.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@
2727
#include "tc/core/mapping_options.h"
2828

2929
#include "../test/test_harness.h"
30-
#include "../test/test_harness_aten.h"
30+
#include "../test/test_harness_aten_cuda.h"
3131
#include "example_fixture.h"
3232

3333
#include "tc/c2/context.h"
34-
#include "tc/core/cuda.h"
34+
#include "tc/core/cuda/cuda.h"
3535
#include "tc/core/flags.h"
3636

3737
using namespace caffe2;

examples/example_tmm.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@
2727
#include "tc/core/mapping_options.h"
2828

2929
#include "../test/test_harness.h"
30-
#include "../test/test_harness_aten.h"
30+
#include "../test/test_harness_aten_cuda.h"
3131
#include "example_fixture.h"
3232

3333
#include "tc/c2/context.h"
34-
#include "tc/core/cuda.h"
34+
#include "tc/core/cuda/cuda.h"
3535
#include "tc/core/flags.h"
3636

3737
using namespace caffe2;

0 commit comments

Comments
 (0)