Skip to content

Add workflow for tensorcore CUDA wheels #40

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion .github/workflows/build-wheels-prioritized-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,17 @@ jobs:
with:
version: ${{ inputs.version }}
exclude: 'pyver:3.10,cuda:12.1.1'

build_textgen_tensorcores:
name: Tensor Core CUDA wheels
needs: build_textgen_wheels
uses: ./.github/workflows/build-wheels-tensorcores.yml
with:
version: ${{ inputs.version }}

build_wheels_cpu:
name: CPU-only Wheels
needs: build_textgen_wheels
needs: build_textgen_tensorcores
uses: ./.github/workflows/build-wheels-cpu.yml
with:
version: ${{ inputs.version }}
Expand Down
230 changes: 230 additions & 0 deletions .github/workflows/build-wheels-tensorcores.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
name: Build tensor core wheels for TGW

on:
workflow_dispatch:
inputs:
version:
description: 'Version tag of llama-cpp-python to build: v0.2.14'
default: 'v0.2.14'
required: true
type: string
config:
description: 'Override configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
default: 'Default'
required: false
type: string
exclude:
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
default: 'None'
required: false
type: string
workflow_call:
inputs:
version:
description: 'Version tag of llama-cpp-python to build: v0.2.14'
default: 'v0.2.14'
required: true
type: string
config:
description: 'Configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
default: 'Default'
required: false
type: string
exclude:
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
default: 'None'
required: false
type: string

permissions:
contents: write

jobs:
define_matrix:
name: Define Build Matrix
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
defaults:
run:
shell: pwsh
env:
CONFIGIN: ${{ inputs.config }}
EXCLUDEIN: ${{ inputs.exclude }}

steps:
- name: Define Job Output
id: set-matrix
run: |
$matrix = @{
'os' = 'ubuntu-20.04', 'windows-latest'
'pyver' = "3.11", "3.10", "3.9", "3.8"
'cuda' = "11.7.1", "11.8.0", "12.0.1", "12.1.1", "12.2.0"
'avxver' = "AVX2", "AVX", "basic"
}

if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})}

if ($env:EXCLUDEIN -ne 'None') {
$exclusions = @()
$exclusions += $env:EXCLUDEIN.split(';').replace(':','=').replace(',',"`n") | ConvertFrom-StringData
$matrix['exclude'] = $exclusions
}

$matrixOut = ConvertTo-Json $matrix -Compress
Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT

build_wheels:
name: Build Wheel ${{ matrix.os }} ${{ matrix.pyver }} ${{ matrix.cuda }} ${{ matrix.avxver }}
needs: define_matrix
runs-on: ${{ matrix.os }}
strategy:
matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
defaults:
run:
shell: pwsh
env:
CUDAVER: ${{ matrix.cuda }}
AVXVER: ${{ matrix.avxver }}
PCKGVER: ${{ inputs.version }}

steps:
- uses: actions/checkout@v4
with:
repository: 'abetlen/llama-cpp-python'
ref: ${{ inputs.version }}
submodules: 'recursive'

- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.pyver }}

- name: Setup Mamba
uses: conda-incubator/setup-miniconda@v2.2.0
with:
activate-environment: "build"
python-version: ${{ matrix.pyver }}
miniforge-variant: Mambaforge
miniforge-version: latest
use-mamba: true
add-pip-as-python-dependency: true
auto-activate-base: false

- name: VS Integration Cache
id: vs-integration-cache
if: runner.os == 'Windows'
uses: actions/cache@v3.3.2
with:
path: ./MSBuildExtensions
key: cuda-${{ matrix.cuda }}-vs-integration

- name: Get Visual Studio Integration
if: runner.os == 'Windows' && steps.vs-integration-cache.outputs.cache-hit != 'true'
run: |
if ($env:CUDAVER -eq '12.1.1') {$x = '12.1.0'} else {$x = $env:CUDAVER}
$links = (Invoke-RestMethod 'https://github.com/Jimver/cuda-toolkit/raw/257a101bc5c656053b5dc220126744980ef7f5b8/src/links/windows-links.ts').Trim().split().where({$_ -ne ''})
for ($i=$q=0;$i -lt $links.count -and $q -lt 2;$i++) {if ($links[$i] -eq "'$x',") {$q++}}
Invoke-RestMethod $links[$i].Trim("'") -OutFile 'cudainstaller.zip'
& 'C:\Program Files\7-Zip\7z.exe' e cudainstaller.zip -oMSBuildExtensions -r *\MSBuildExtensions\* > $null
Remove-Item 'cudainstaller.zip'

- name: Install Visual Studio Integration
if: runner.os == 'Windows'
run: |
$y = (gi '.\MSBuildExtensions').fullname + '\*'
(gi 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Microsoft\VC\*\BuildCustomizations').fullname.foreach({cp $y $_})
$cupath = 'CUDA_PATH_V' + $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','_')
echo "$cupath=$env:CONDA_PREFIX" >> $env:GITHUB_ENV

- name: Install Dependencies
run: |
$cudaVersion = $env:CUDAVER
$cudaChannels = ''
$cudaNum = [int]$cudaVersion.substring($cudaVersion.LastIndexOf('.')+1)
while ($cudaNum -ge 0) { $cudaChannels += '-c nvidia/label/cuda-' + $cudaVersion.Remove($cudaVersion.LastIndexOf('.')+1) + $cudaNum + ' '; $cudaNum-- }
mamba install -y 'cuda' $cudaChannels.TrimEnd().Split()
if (!(mamba list cuda)[-1].contains('cuda')) {sleep -s 10; mamba install -y 'cuda' $cudaChannels.TrimEnd().Split()}
if (!(mamba list cuda)[-1].contains('cuda')) {throw 'CUDA Toolkit failed to install!'}
python -m pip install build wheel

- name: Change Package Name
run: |
$packageVersion = [version]$env:PCKGVER.TrimStart('v')
$pyproject = Get-Content 'pyproject.toml' -raw
$cmakelists = Get-Content 'CMakeLists.txt' -raw
if ($packageVersion -lt [version]'0.2.0') {
$setup = Get-Content 'setup.py' -raw
$regexstr = '(?s)name="llama_cpp_python",(.+)(package_dir={"llama_cpp": "llama_cpp", "llama_cpp.server": "llama_cpp/server"},.+?packages=\["llama_cpp", "llama_cpp.server"],)'
if ($packageVersion -gt [version]'0.1.77') {$regexstr = '(?s)name="llama_cpp_python",(.+)(package_dir={"llama_cpp": "llama_cpp", "llama_cpp.server": "llama_cpp/server"},.+?package_data={"llama_cpp": \["py.typed"]},.+?packages=\["llama_cpp", "llama_cpp.server"],)'}
$regexmatch = [Regex]::Matches($setup,$regexstr)
if (!($regexmatch[0].Success)) {throw 'setup.py parsing failed'}
$newstr = 'name="llama_cpp_python_cuda_tensorcores",' + $regexmatch[0].Groups[1].Value + $regexmatch[0].Groups[2].Value.Replace('llama_cpp','llama_cpp_cuda_tensorcores')
$newsetup = $regexmatch[0].Result(('$`'+$newstr+'$'''))
New-Item 'setup.py' -itemType File -value $newsetup -force
$regexstr = '(?s)(?<=name = ")llama_cpp_python(".+?packages = \[{include = ")llama_cpp(".+)'
$regexmatch = [Regex]::Matches($pyproject,$regexstr)
if (!($regexmatch[0].Success)) {throw 'pyproject.toml parsing failed'}
$newpyproject = $regexmatch[0].Result(('$`'+'llama_cpp_python_cuda_tensorcores'+'$1llama_cpp_cuda_tensorcores$2'))
} else {
$regexstr = '(?s)(?<=\[project]\s+?name = ")llama_cpp_python(".+?all = \[\s+?")llama_cpp_python(\[.+?wheel.packages = \[")llama_cpp("].+?input = ")llama_cpp(?=/__init__.py")'
$regexmatch = [Regex]::Matches($pyproject,$regexstr)
if (!($regexmatch[0].Success)) {throw 'pyproject.toml parsing failed'}
$newpyproject = $regexmatch[0].Result(('$`' + 'llama_cpp_python_cuda_tensorcores' + '$1llama_cpp_cuda_tensorcores$2' + 'llama_cpp_cuda_tensorcores$3llama_cpp_cuda_tensorcores' + '$'''))
}
Copy-Item 'llama_cpp' 'llama_cpp_cuda_tensorcores' -recurse
New-Item 'pyproject.toml' -itemType File -value $newpyproject -force
New-Item 'CMakeLists.txt' -itemType File -value $cmakelists.Replace('llama_cpp','llama_cpp_cuda_tensorcores') -force

if ($packageVersion -gt [version]'0.2.13')
{
$pyScripts = (Get-ChildItem $(Join-Path '.' 'llama_cpp_cuda_tensorcores' '*.py'))
$pyScripts.fullname.foreach({New-Item $_ -itemType File -value (Get-Content $_ -raw).replace('import llama_cpp.llama','from . import llama') -force})
}

- name: Build Wheel
run: |
$packageVersion = [version]$env:PCKGVER.TrimStart('v')
$cudaVersion = $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','')
$env:CUDA_PATH = $env:CONDA_PREFIX
$env:CUDA_HOME = $env:CONDA_PREFIX
if ($IsLinux) {$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH}
$env:VERBOSE = '1'
$env:CMAKE_ARGS = '-DLLAMA_CUBLAS=on -DCMAKE_CUDA_ARCHITECTURES=all'
if ($packageVersion -gt [version]'0.1.68' -and $packageVersion -lt [version]'0.1.71') {$env:CMAKE_ARGS = '-DLLAMA_CUBLAS=on -DCMAKE_CUDA_ARCHITECTURES=35-real;37-real;52;61-real;70-real;72-real;75-real;80-real;86-real;89-real;90'}
if ($packageVersion -gt [version]'0.1.68' -and $packageVersion -lt [version]'0.1.71' -and [version]$env:CUDAVER -ge [version]'12.0') {$env:CMAKE_ARGS = '-DLLAMA_CUBLAS=on -DCMAKE_CUDA_ARCHITECTURES=52;61-real;70-real;72-real;75-real;80-real;86-real;89-real;90'}
if ($packageVersion -gt [version]'0.1.68' -and $packageVersion -lt [version]'0.1.71' -and [version]$env:CUDAVER -lt [version]'11.8') {$env:CMAKE_ARGS = '-DLLAMA_CUBLAS=on -DCMAKE_CUDA_ARCHITECTURES=35-real;37-real;52;61-real;70-real;72-real;75-real;80-real;86'}
if ($packageVersion -gt [version]'0.2.13') {$env:CMAKE_ARGS = "-DLLAMA_NATIVE=off $env:CMAKE_ARGS"}
if ($packageVersion -lt [version]'0.1.66') {$env:CUDAFLAGS = '-arch=all'}
if ($env:AVXVER -eq 'AVX') {$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off'; $cputag = 'avx'}
if ($env:AVXVER -eq 'basic') {$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off'; $cputag = 'basic'}
$buildtag = "+cu$cudaVersion$cputag"
if ($packageVersion -lt [version]'0.2.0') {
$env:FORCE_CMAKE = '1'
python -m build --wheel -C--build-option=egg_info "-C--build-option=--tag-build=$buildtag"
} else {
$initpath = Join-Path '.' 'llama_cpp_cuda_tensorcores' '__init__.py' -resolve
$initcontent = Get-Content $initpath -raw
$regexstr = '(?s)(?<=__version__ \= ")\d+(?:\.\d+)*(?=")'
$regexmatch = [Regex]::Matches($initcontent,$regexstr)
if (!($regexmatch[0].Success)) {throw '__init__.py parsing failed'}
$newinit = $regexmatch[0].Result(('$`' + '$&' + $buildtag + '$'''))
New-Item $initpath -itemType File -value $newinit -force
python -m build --wheel
}

- name: Upload files to a GitHub release
id: upload-release
uses: svenstaro/upload-release-action@2.7.0
continue-on-error: true
with:
file: ./dist/*.whl
tag: textgen-webui
file_glob: true
make_latest: false
overwrite: true

- uses: actions/upload-artifact@v3
if: steps.upload-release.outcome == 'failure'
with:
name: textgen-webui-wheels
path: ./dist/*.whl