Skip to content

CMakeLists refinements for CUBLAS and phthread #57

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 41 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,10 @@ set(CMAKE_CXX_STANDARD_REQUIRED true)
set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED true)
set(THREADS_PREFER_PTHREAD_FLAG ON)
if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID STREQUAL "Clang")
# Note: it must be performed before the Threads package finding (CMAKE_HAVE_LIBC_PTHREAD testing)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread")
endif()
find_package(Threads REQUIRED)

if (NOT MSVC)
Expand Down Expand Up @@ -258,6 +262,7 @@ if (LLAMA_CUBLAS)

else()
message(WARNING "cuBLAS not found")
set(LLAMA_CUBLAS OFF CACHE BOOL "LLAMA_CUBLAS set to OFF because cuBLAS is not found" FORCE)
endif()
endif()

Expand Down Expand Up @@ -422,7 +427,8 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$")
add_compile_options($<$<COMPILE_LANGUAGE:C>:/arch:AVX>)
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/arch:AVX>)
endif()
else()
elseif(NOT ${CUDAToolkit_FOUND})
# Note: these flags should be applied to the CPU architecture, but they are passed to nvcc when CUDAToolkit is used
if (LLAMA_F16C)
add_compile_options(-mf16c)
endif()
Expand Down Expand Up @@ -470,6 +476,16 @@ add_library(ggml OBJECT
target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES})
target_compile_features(ggml PUBLIC c_std_11) # don't bump
target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
if(GGML_USE_CUBLAS)
# Set the minimum standard, CMake can still decide to use -std=c++17
# if the given compilers support C++17
target_compile_features(ggml PUBLIC cuda_std_11)
set_target_properties(ggml PROPERTIES
CUDA_RUNTIME_LIBRARY SHARED
# CUDA_STANDARD 11 # This one cannot be changed by CMake
# CUDA_SEPARABLE_COMPILATION ON # Optional
)
endif()

add_library(ggml_static STATIC $<TARGET_OBJECTS:ggml>)
if (BUILD_SHARED_LIBS)
Expand All @@ -485,6 +501,16 @@ add_library(llama

target_include_directories(llama PUBLIC .)
target_compile_features(llama PUBLIC cxx_std_11) # don't bump
if(LLAMA_CUBLAS)
# Set the minimum standard, CMake can still decide to use -std=c++17
# if the given compilers support C++17
target_compile_features(llama PUBLIC cuda_std_11)
set_target_properties(llama PROPERTIES
CUDA_RUNTIME_LIBRARY SHARED
# CUDA_STANDARD 11 # This one cannot be changed by CMake
# CUDA_SEPARABLE_COMPILATION ON # Optional
)
endif()
target_link_libraries(llama PRIVATE
ggml
${LLAMA_EXTRA_LIBS}
Expand Down Expand Up @@ -527,12 +553,21 @@ endif()

if (GGML_SOURCES_CUDA)
message(STATUS "GGML CUDA sources found, configuring CUDA architecture")
set_property(TARGET ggml PROPERTY CUDA_ARCHITECTURES OFF)
get_target_property(TARG_CUDA_ARCH ggml CUDA_ARCHITECTURES)
if ((NOT ${TARG_CUDA_ARCH}) OR "${TARG_CUDA_ARCH}" STREQUAL "TARG_CUDA_ARCH-NOTFOUND")
if (NOT ${CUDA_ARCHITECTURES})
set(TARG_CUDA_ARCH all) # all or OFF
message(WARNING "CUDA_ARCHITECTURES are not defined for the targets, setting them to ${TARG_CUDA_ARCH}")
else()
set(TARG_CUDA_ARCH ${CUDA_ARCHITECTURES})
message(STATUS "Setting CUDA_ARCHITECTURES to ${TARG_CUDA_ARCH}")
endif()
set_property(TARGET ggml PROPERTY CUDA_ARCHITECTURES ${TARG_CUDA_ARCH})
set_property(TARGET llama PROPERTY CUDA_ARCHITECTURES ${TARG_CUDA_ARCH})
# falcon
set_property(TARGET falcon PROPERTY CUDA_ARCHITECTURES ${TARG_CUDA_ARCH})
endif()
set_property(TARGET ggml PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto")
set_property(TARGET llama PROPERTY CUDA_ARCHITECTURES OFF)
# falcon
set_property(TARGET falcon PROPERTY CUDA_ARCHITECTURES OFF)

endif()


Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ The speed can be seen at 35 tokens/sec start gradually lowering over context - t
#Choose a current distro:
wsl.exe --list --online
wsl --install -d distro
# cmake 3.16 is required and the cuda toolset
# cmake 3.17 is required and the cuda toolset
# If you run an old distro you can upgrade (like apt update; apt upgrade; apt full-upgrade; pico /etc/apt/sources.list/; apt update; apt upgrade; apt full-upgrade; apt autoremove; lsb_release -a); then wsl --shutdown and restart it
# install cuda WSL toolkit
wget https://developer.download.nvidia.com/compute/cuda/repos/wsl-ubuntu/x86_64/cuda-keyring_1.0-1_all.deb
Expand Down