Skip to content

Commit d0a5602

Browse files
committed
Merge remote-tracking branch 'upstream/master'
2 parents db34e6b + e6a46b0 commit d0a5602

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+2888
-915
lines changed

.github/workflows/build.yml

Lines changed: 142 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ jobs:
120120
make
121121
122122
macOS-latest-cmake:
123-
runs-on: macOS-latest
123+
runs-on: macos-latest
124124

125125
steps:
126126
- name: Clone
@@ -148,29 +148,86 @@ jobs:
148148
149149
windows-latest-cmake:
150150
runs-on: windows-latest
151+
env:
152+
OPENBLAS_VERSION: 0.3.23
153+
OPENCL_VERSION: 2023.04.17
154+
CLBLAST_VERSION: 1.5.3
151155

152156
strategy:
153157
matrix:
154158
include:
155-
- build: 'avx2'
156-
defines: ''
157-
- build: 'avx'
158-
defines: '-DLLAMA_AVX2=OFF'
159-
- build: 'avx512'
160-
defines: '-DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON'
159+
- build: 'avx2'
160+
defines: ''
161+
- build: 'avx'
162+
defines: '-DLLAMA_AVX2=OFF'
163+
- build: 'avx512'
164+
defines: '-DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON'
165+
- build: 'clblast'
166+
defines: '-DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"'
167+
- build: 'openblas'
168+
defines: '-DLLAMA_OPENBLAS=ON -DBLAS_LIBRARIES="/LIBPATH:$env:RUNNER_TEMP/openblas/lib" -DOPENBLAS_INC="$env:RUNNER_TEMP/openblas/include"'
161169

162170
steps:
163171
- name: Clone
164172
id: checkout
165173
uses: actions/checkout@v1
166174

175+
- name: Download OpenCL SDK
176+
id: get_opencl
177+
if: ${{ matrix.build == 'clblast' }}
178+
run: |
179+
curl.exe -o $env:RUNNER_TEMP/opencl.zip -L "https://github.com/KhronosGroup/OpenCL-SDK/releases/download/v${env:OPENCL_VERSION}/OpenCL-SDK-v${env:OPENCL_VERSION}-Win-x64.zip"
180+
mkdir $env:RUNNER_TEMP/opencl
181+
tar.exe -xvf $env:RUNNER_TEMP/opencl.zip --strip-components=1 -C $env:RUNNER_TEMP/opencl
182+
183+
- name: Download CLBlast
184+
id: get_clblast
185+
if: ${{ matrix.build == 'clblast' }}
186+
run: |
187+
curl.exe -o $env:RUNNER_TEMP/clblast.zip -L "https://github.com/CNugteren/CLBlast/releases/download/${env:CLBLAST_VERSION}/CLBlast-${env:CLBLAST_VERSION}-Windows-x64.zip"
188+
curl.exe -o $env:RUNNER_TEMP/CLBlast.LICENSE.txt -L "https://github.com/CNugteren/CLBlast/raw/${env:CLBLAST_VERSION}/LICENSE"
189+
mkdir $env:RUNNER_TEMP/clblast
190+
tar.exe -xvf $env:RUNNER_TEMP/clblast.zip -C $env:RUNNER_TEMP/clblast
191+
foreach ($f in (gci -Recurse -Path "$env:RUNNER_TEMP/clblast" -Filter '*.cmake')) {
192+
$txt = Get-Content -Path $f -Raw
193+
$txt.Replace('C:/dependencies/opencl/', "$($env:RUNNER_TEMP.Replace('\','/'))/opencl/") | Set-Content -Path $f -Encoding UTF8
194+
}
195+
196+
- name: Download OpenBLAS
197+
id: get_openblas
198+
if: ${{ matrix.build == 'openblas' }}
199+
run: |
200+
curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip"
201+
curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE"
202+
mkdir $env:RUNNER_TEMP/openblas
203+
tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas
204+
$vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
205+
$msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
206+
$lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe')
207+
& $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll
208+
167209
- name: Build
168210
id: cmake_build
169211
run: |
170212
mkdir build
171213
cd build
172214
cmake .. ${{ matrix.defines }}
173215
cmake --build . --config Release
216+
cp ../LICENSE ./bin/Release/llama.cpp.txt
217+
218+
- name: Add clblast.dll
219+
id: add_clblast_dll
220+
if: ${{ matrix.build == 'clblast' }}
221+
run: |
222+
cp $env:RUNNER_TEMP/clblast/lib/clblast.dll ./build/bin/Release
223+
cp $env:RUNNER_TEMP/CLBlast.LICENSE.txt ./build/bin/Release/CLBlast-${env:CLBLAST_VERSION}.txt
224+
225+
- name: Add libopenblas.dll
226+
id: add_libopenblas_dll
227+
if: ${{ matrix.build == 'openblas' }}
228+
run: |
229+
cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll
230+
cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt
174231
175232
- name: Check AVX512F support
176233
id: check_avx512f
@@ -187,7 +244,7 @@ jobs:
187244
188245
- name: Test
189246
id: cmake_test
190-
if: ${{ matrix.build != 'avx512' || env.HAS_AVX512F == '1' }} # Test AVX-512 only when possible
247+
if: ${{ matrix.build != 'clblast' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} # Test AVX-512 only when possible
191248
run: |
192249
cd build
193250
ctest -C Release --verbose
@@ -210,6 +267,82 @@ jobs:
210267
path: |
211268
llama-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-${{ matrix.build }}-x64.zip
212269
270+
windows-latest-cmake-cublas:
271+
runs-on: windows-latest
272+
273+
strategy:
274+
matrix:
275+
cuda: ['12.1.0', '11.7.1']
276+
build: ['cublas']
277+
278+
steps:
279+
- name: Clone
280+
id: checkout
281+
uses: actions/checkout@v1
282+
283+
- uses: Jimver/cuda-toolkit@v0.2.10
284+
id: cuda-toolkit
285+
with:
286+
cuda: ${{ matrix.cuda }}
287+
# TODO(green-sky): _dev seems to fail, and non dev are not enought
288+
#sub-packages: '["nvcc", "cudart", "cublas", "cudart_dev", "cublas_dev"]'
289+
290+
- name: Build
291+
id: cmake_build
292+
run: |
293+
mkdir build
294+
cd build
295+
cmake .. -DLLAMA_CUBLAS=ON
296+
cmake --build . --config Release
297+
298+
- name: Get commit hash
299+
id: commit
300+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
301+
uses: pr-mpt/actions-commit-hash@v2
302+
303+
- name: Pack artifacts
304+
id: pack_artifacts
305+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
306+
run: |
307+
7z a llama-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip .\build\bin\Release\*
308+
309+
- name: Upload artifacts
310+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
311+
uses: actions/upload-artifact@v3
312+
with:
313+
path: |
314+
llama-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip
315+
316+
- name: Copy and pack Cuda runtime
317+
if: ${{ matrix.cuda == '12.1.0' }}
318+
# TODO(green-sky): paths are cuda 12 specific
319+
run: |
320+
echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}"
321+
mkdir '.\build\bin\cudart\'
322+
cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cudart64_12.dll" '.\build\bin\cudart\'
323+
cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cublas64_12.dll" '.\build\bin\cudart\'
324+
cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cublasLt64_12.dll" '.\build\bin\cudart\'
325+
7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip .\build\bin\cudart\*
326+
327+
- name: Copy and pack Cuda runtime
328+
if: ${{ matrix.cuda == '11.7.1' }}
329+
# TODO(green-sky): paths are cuda 11 specific
330+
run: |
331+
echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}"
332+
mkdir '.\build\bin\cudart\'
333+
ls "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin"
334+
cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cudart64_110.dll" '.\build\bin\cudart\'
335+
cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cublas64_11.dll" '.\build\bin\cudart\'
336+
cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cublasLt64_11.dll" '.\build\bin\cudart\'
337+
7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip .\build\bin\cudart\*
338+
339+
- name: Upload Cuda runtime
340+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
341+
uses: actions/upload-artifact@v3
342+
with:
343+
path: |
344+
cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
345+
213346
release:
214347
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
215348

@@ -221,6 +354,7 @@ jobs:
221354
- macOS-latest-make
222355
- macOS-latest-cmake
223356
- windows-latest-cmake
357+
- windows-latest-cmake-cublas
224358

225359
steps:
226360
- name: Download artifacts

.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ build-sanitize-addr/
2121
build-sanitize-thread/
2222

2323
models/*
24+
*.bin
2425

2526
/main
2627
/main-gptneox
@@ -33,10 +34,11 @@ models/*
3334
/result
3435
/perplexity
3536
/embedding
36-
/benchmark-q4_0-matmult
37+
/benchmark-matmult
3738
/vdot
3839
/Pipfile
3940

41+
build-info.h
4042
arm_neon.h
4143
compile_commands.json
4244

@@ -46,5 +48,6 @@ zig-out/
4648
zig-cache/
4749

4850
ppl-*.txt
51+
qnt-*.txt
4952

5053
examples/jeopardy/results.txt

CMakeLists.txt

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,39 @@ option(LLAMA_CLBLAST "llama: use CLBlast"
7272
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
7373
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
7474

75+
#
76+
# Build info header
77+
#
78+
79+
# Generate initial build-info.h
80+
include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)
81+
82+
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git")
83+
set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/.git")
84+
85+
# Is git submodule
86+
if(NOT IS_DIRECTORY "${GIT_DIR}")
87+
file(READ ${GIT_DIR} REAL_GIT_DIR_LINK)
88+
string(REGEX REPLACE "gitdir: (.*)\n$" "\\1" REAL_GIT_DIR ${REAL_GIT_DIR_LINK})
89+
set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/${REAL_GIT_DIR}")
90+
endif()
91+
92+
# Add a custom target for build-info.h
93+
add_custom_target(BUILD_INFO ALL DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.h")
94+
95+
# Add a custom command to rebuild build-info.h when .git/index changes
96+
add_custom_command(
97+
OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/build-info.h"
98+
COMMENT "Generating build details from Git"
99+
COMMAND ${CMAKE_COMMAND} -P "${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake"
100+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
101+
DEPENDS "${GIT_DIR}/index"
102+
VERBATIM
103+
)
104+
else()
105+
message(WARNING "Git repository not found; to enable automatic generation of build info, make sure Git is installed and the project is a Git repository.")
106+
endif()
107+
75108
#
76109
# Compile flags
77110
#
@@ -258,9 +291,22 @@ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES
258291
# TODO: arm msvc?
259292
else()
260293
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
294+
# Apple M1, M2, etc.
295+
# Raspberry Pi 3, 4, Zero 2 (64-bit)
261296
add_compile_options(-mcpu=native)
262297
endif()
263-
# TODO: armv6,7,8 version specific flags
298+
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
299+
# Raspberry Pi 1, Zero
300+
add_compile_options(-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access)
301+
endif()
302+
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
303+
# Raspberry Pi 2
304+
add_compile_options(-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations)
305+
endif()
306+
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
307+
# Raspberry Pi 3, 4, Zero 2 (32-bit)
308+
add_compile_options(-mfp16-format=ieee -mno-unaligned-access)
309+
endif()
264310
endif()
265311
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$")
266312
message(STATUS "x86 detected")
@@ -311,8 +357,11 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$")
311357
add_compile_options(-mavx512vnni)
312358
endif()
313359
endif()
360+
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
361+
message(STATUS "PowerPC detected")
362+
add_compile_options(-mcpu=native -mtune=native)
363+
#TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
314364
else()
315-
# TODO: support PowerPC
316365
message(STATUS "Unknown architecture")
317366
endif()
318367

0 commit comments

Comments
 (0)