|
| 1 | +# Ensure the compiler is a valid clang when building the GPU target. |
| 2 | +set(req_ver "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}") |
| 3 | +if(LLVM_VERSION_MAJOR AND NOT (CMAKE_CXX_COMPILER_ID MATCHES "[Cc]lang" AND |
| 4 | + ${CMAKE_CXX_COMPILER_VERSION} VERSION_EQUAL "${req_ver}")) |
| 5 | + message(FATAL_ERROR "Cannot build GPU device runtime. CMake compiler " |
| 6 | + "'${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}' " |
| 7 | + " is not 'Clang ${req_ver}'.") |
| 8 | +endif() |
| 9 | + |
| 10 | +set(src_files |
| 11 | + ${CMAKE_CURRENT_SOURCE_DIR}/src/Allocator.cpp |
| 12 | + ${CMAKE_CURRENT_SOURCE_DIR}/src/Configuration.cpp |
| 13 | + ${CMAKE_CURRENT_SOURCE_DIR}/src/Debug.cpp |
| 14 | + ${CMAKE_CURRENT_SOURCE_DIR}/src/Kernel.cpp |
| 15 | + ${CMAKE_CURRENT_SOURCE_DIR}/src/LibC.cpp |
| 16 | + ${CMAKE_CURRENT_SOURCE_DIR}/src/Mapping.cpp |
| 17 | + ${CMAKE_CURRENT_SOURCE_DIR}/src/Misc.cpp |
| 18 | + ${CMAKE_CURRENT_SOURCE_DIR}/src/Parallelism.cpp |
| 19 | + ${CMAKE_CURRENT_SOURCE_DIR}/src/Profiling.cpp |
| 20 | + ${CMAKE_CURRENT_SOURCE_DIR}/src/Reduction.cpp |
| 21 | + ${CMAKE_CURRENT_SOURCE_DIR}/src/State.cpp |
| 22 | + ${CMAKE_CURRENT_SOURCE_DIR}/src/Synchronization.cpp |
| 23 | + ${CMAKE_CURRENT_SOURCE_DIR}/src/Tasking.cpp |
| 24 | + ${CMAKE_CURRENT_SOURCE_DIR}/src/DeviceUtils.cpp |
| 25 | + ${CMAKE_CURRENT_SOURCE_DIR}/src/Workshare.cpp |
| 26 | +) |
| 27 | + |
| 28 | +list(APPEND compile_options -flto) |
| 29 | +list(APPEND compile_options -fvisibility=hidden) |
| 30 | +list(APPEND compile_options -nogpulib) |
| 31 | +list(APPEND compile_options -nostdlibinc) |
| 32 | +list(APPEND compile_options -fno-rtti) |
| 33 | +list(APPEND compile_options -fno-exceptions) |
| 34 | +list(APPEND compile_options -fconvergent-functions) |
| 35 | +list(APPEND compile_options -Wno-unknown-cuda-version) |
| 36 | +if(LLVM_DEFAULT_TARGET_TRIPLE) |
| 37 | + list(APPEND compile_options --target=${LLVM_DEFAULT_TARGET_TRIPLE}) |
| 38 | +endif() |
| 39 | + |
| 40 | +# We disable the slp vectorizer during the runtime optimization to avoid |
| 41 | +# vectorized accesses to the shared state. Generally, those are "good" but |
| 42 | +# the optimizer pipeline (esp. Attributor) does not fully support vectorized |
| 43 | +# instructions yet and we end up missing out on way more important constant |
| 44 | +# propagation. That said, we will run the vectorizer again after the runtime |
| 45 | +# has been linked into the user program. |
| 46 | +list(APPEND compile_flags "SHELL: -mllvm -vectorize-slp=false") |
| 47 | +if("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^amdgcn" OR |
| 48 | + "${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^amdgcn") |
| 49 | + set(target_name "amdgpu") |
| 50 | + list(APPEND compile_flags "SHELL:-Xclang -mcode-object-version=none") |
| 51 | +elseif("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^nvptx" OR |
| 52 | + "${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^nvptx") |
| 53 | + set(target_name "nvptx") |
| 54 | + list(APPEND compile_flags --cuda-feature=+ptx63) |
| 55 | +endif() |
| 56 | + |
| 57 | +# Trick to combine these into a bitcode file via the linker's LTO pass. |
| 58 | +add_executable(libompdevice ${src_files}) |
| 59 | +set_target_properties(libompdevice PROPERTIES |
| 60 | + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} |
| 61 | + LINKER_LANGUAGE CXX |
| 62 | + BUILD_RPATH "" |
| 63 | + INSTALL_RPATH "" |
| 64 | + RUNTIME_OUTPUT_NAME libomptarget-${target_name}.bc) |
| 65 | + |
| 66 | +# If the user built with the GPU C library enabled we will use that instead. |
| 67 | +if(LIBOMPTARGET_GPU_LIBC_SUPPORT) |
| 68 | + target_compile_definitions(libompdevice PRIVATE OMPTARGET_HAS_LIBC) |
| 69 | +endif() |
| 70 | +target_compile_definitions(libompdevice PRIVATE SHARED_SCRATCHPAD_SIZE=512) |
| 71 | + |
| 72 | +target_include_directories(libompdevice PRIVATE |
| 73 | + ${CMAKE_CURRENT_SOURCE_DIR}/include |
| 74 | + ${CMAKE_CURRENT_SOURCE_DIR}/../../libc |
| 75 | + ${CMAKE_CURRENT_SOURCE_DIR}/../../offload/include) |
| 76 | +target_compile_options(libompdevice PRIVATE ${compile_options}) |
| 77 | +target_link_options(libompdevice PRIVATE |
| 78 | + "-flto" "-r" "-nostdlib" "-Wl,--lto-emit-llvm") |
| 79 | +if(LLVM_DEFAULT_TARGET_TRIPLE) |
| 80 | + target_link_options(libompdevice PRIVATE "--target=${LLVM_DEFAULT_TARGET_TRIPLE}") |
| 81 | +endif() |
| 82 | +install(TARGETS libompdevice |
| 83 | + PERMISSIONS OWNER_WRITE OWNER_READ GROUP_READ WORLD_READ |
| 84 | + DESTINATION ${OPENMP_INSTALL_LIBDIR}) |
| 85 | + |
| 86 | +add_library(ompdevice.all_objs OBJECT IMPORTED) |
| 87 | +set_property(TARGET ompdevice.all_objs APPEND PROPERTY IMPORTED_OBJECTS |
| 88 | + ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-${target_name}.bc) |
| 89 | + |
| 90 | +# Archive all the object files generated above into a static library |
| 91 | +add_library(ompdevice STATIC) |
| 92 | +add_dependencies(ompdevice libompdevice) |
| 93 | +set_target_properties(ompdevice PROPERTIES |
| 94 | + ARCHIVE_OUTPUT_DIRECTORY "${OPENMP_INSTALL_LIBDIR}" |
| 95 | + ARCHIVE_OUTPUT_NAME ompdevice |
| 96 | + LINKER_LANGUAGE CXX |
| 97 | +) |
| 98 | +target_link_libraries(ompdevice PRIVATE ompdevice.all_objs) |
| 99 | +install(TARGETS ompdevice ARCHIVE DESTINATION "${OPENMP_INSTALL_LIBDIR}") |
0 commit comments