From fc287aa21faa500119ced20ec4eba693bdf8c88e Mon Sep 17 00:00:00 2001 From: Artem Date: Tue, 11 Jul 2023 15:19:22 +0300 Subject: [PATCH] phthread linking fixed on Linux, CUDA_ARCHITECTURES set to all for CUBLAS when not defined explicitly, cuda_std_11 enabled on CUBLAS --- CMakeLists.txt | 47 +++++++++++++++++++++++++++++++++++++++++------ README.md | 2 +- 2 files changed, 42 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9a5315c052f79..507d9e1181223 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -130,6 +130,10 @@ set(CMAKE_CXX_STANDARD_REQUIRED true) set(CMAKE_C_STANDARD 11) set(CMAKE_C_STANDARD_REQUIRED true) set(THREADS_PREFER_PTHREAD_FLAG ON) +if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID STREQUAL "Clang") + # Note: it must be performed before the Threads package finding (CMAKE_HAVE_LIBC_PTHREAD testing) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread") +endif() find_package(Threads REQUIRED) if (NOT MSVC) @@ -258,6 +262,7 @@ if (LLAMA_CUBLAS) else() message(WARNING "cuBLAS not found") + set(LLAMA_CUBLAS OFF CACHE BOOL "LLAMA_CUBLAS set to OFF because cuBLAS is not found" FORCE) endif() endif() @@ -422,7 +427,8 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$") add_compile_options($<$:/arch:AVX>) add_compile_options($<$:/arch:AVX>) endif() - else() + elseif(NOT ${CUDAToolkit_FOUND}) + # Note: these flags should be applied to the CPU architecture, but they are passed to nvcc when CUDAToolkit is used if (LLAMA_F16C) add_compile_options(-mf16c) endif() @@ -470,6 +476,16 @@ add_library(ggml OBJECT target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES}) target_compile_features(ggml PUBLIC c_std_11) # don't bump target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS}) +if(GGML_USE_CUBLAS) + # Set the minimum standard, CMake can still decide to use -std=c++17 + # if the given compilers support C++17 + target_compile_features(ggml PUBLIC cuda_std_11) + set_target_properties(ggml PROPERTIES + CUDA_RUNTIME_LIBRARY SHARED + # CUDA_STANDARD 11 # This one cannot be changed by CMake + # CUDA_SEPARABLE_COMPILATION ON # Optional + ) +endif() add_library(ggml_static STATIC $) if (BUILD_SHARED_LIBS) @@ -485,6 +501,16 @@ add_library(llama target_include_directories(llama PUBLIC .) target_compile_features(llama PUBLIC cxx_std_11) # don't bump +if(LLAMA_CUBLAS) + # Set the minimum standard, CMake can still decide to use -std=c++17 + # if the given compilers support C++17 + target_compile_features(llama PUBLIC cuda_std_11) + set_target_properties(llama PROPERTIES + CUDA_RUNTIME_LIBRARY SHARED + # CUDA_STANDARD 11 # This one cannot be changed by CMake + # CUDA_SEPARABLE_COMPILATION ON # Optional + ) +endif() target_link_libraries(llama PRIVATE ggml ${LLAMA_EXTRA_LIBS} @@ -527,12 +553,21 @@ endif() if (GGML_SOURCES_CUDA) message(STATUS "GGML CUDA sources found, configuring CUDA architecture") - set_property(TARGET ggml PROPERTY CUDA_ARCHITECTURES OFF) + get_target_property(TARG_CUDA_ARCH ggml CUDA_ARCHITECTURES) + if ((NOT ${TARG_CUDA_ARCH}) OR "${TARG_CUDA_ARCH}" STREQUAL "TARG_CUDA_ARCH-NOTFOUND") + if (NOT ${CUDA_ARCHITECTURES}) + set(TARG_CUDA_ARCH all) # all or OFF + message(WARNING "CUDA_ARCHITECTURES are not defined for the targets, setting them to ${TARG_CUDA_ARCH}") + else() + set(TARG_CUDA_ARCH ${CUDA_ARCHITECTURES}) + message(STATUS "Setting CUDA_ARCHITECTURES to ${TARG_CUDA_ARCH}") + endif() + set_property(TARGET ggml PROPERTY CUDA_ARCHITECTURES ${TARG_CUDA_ARCH}) + set_property(TARGET llama PROPERTY CUDA_ARCHITECTURES ${TARG_CUDA_ARCH}) + # falcon + set_property(TARGET falcon PROPERTY CUDA_ARCHITECTURES ${TARG_CUDA_ARCH}) + endif() set_property(TARGET ggml PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto") - set_property(TARGET llama PROPERTY CUDA_ARCHITECTURES OFF) - # falcon - set_property(TARGET falcon PROPERTY CUDA_ARCHITECTURES OFF) - endif() diff --git a/README.md b/README.md index cc660947e1fb8..e31dcc34da1d1 100644 --- a/README.md +++ b/README.md @@ -105,7 +105,7 @@ The speed can be seen at 35 tokens/sec start gradually lowering over context - t #Choose a current distro: wsl.exe --list --online wsl --install -d distro -# cmake 3.16 is required and the cuda toolset +# cmake 3.17 is required and the cuda toolset # If you run an old distro you can upgrade (like apt update; apt upgrade; apt full-upgrade; pico /etc/apt/sources.list/; apt update; apt upgrade; apt full-upgrade; apt autoremove; lsb_release -a); then wsl --shutdown and restart it # install cuda WSL toolkit wget https://developer.download.nvidia.com/compute/cuda/repos/wsl-ubuntu/x86_64/cuda-keyring_1.0-1_all.deb