From 5d1f2cfe2e5dfc3b5d644bea98c89551ebe0ca02 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 3 Jan 2024 19:48:54 -0800 Subject: [PATCH 1/2] Add workflow for tensorcores wheels --- .../build-wheels-prioritized-release.yml | 9 +- .../workflows/build-wheels-tensorcores.yml | 230 ++++++++++++++++++ 2 files changed, 238 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/build-wheels-tensorcores.yml diff --git a/.github/workflows/build-wheels-prioritized-release.yml b/.github/workflows/build-wheels-prioritized-release.yml index 10603683..069ce7cc 100644 --- a/.github/workflows/build-wheels-prioritized-release.yml +++ b/.github/workflows/build-wheels-prioritized-release.yml @@ -66,10 +66,17 @@ jobs: with: version: ${{ inputs.version }} exclude: 'pyver:3.10,cuda:12.1.1' + + build_textgen_tensorcores: + name: Tensor Core CUDA wheels + needs: build_wheels_main + uses: ./.github/workflows/build-wheels-tensorcores.yml + with: + version: ${{ inputs.version }} build_wheels_cpu: name: CPU-only Wheels - needs: build_textgen_wheels + needs: build_textgen_tensorcores uses: ./.github/workflows/build-wheels-cpu.yml with: version: ${{ inputs.version }} diff --git a/.github/workflows/build-wheels-tensorcores.yml b/.github/workflows/build-wheels-tensorcores.yml new file mode 100644 index 00000000..4e566df3 --- /dev/null +++ b/.github/workflows/build-wheels-tensorcores.yml @@ -0,0 +1,230 @@ +name: Build tensor core wheels for TGW + +on: + workflow_dispatch: + inputs: + version: + description: 'Version tag of llama-cpp-python to build: v0.2.14' + default: 'v0.2.14' + required: true + type: string + config: + description: 'Override configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2' + default: 'Default' + required: false + type: string + exclude: + description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2' + default: 'None' + required: false + type: string + workflow_call: + inputs: + version: + description: 'Version tag of llama-cpp-python to build: v0.2.14' + default: 'v0.2.14' + required: true + type: string + config: + description: 'Configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2' + default: 'Default' + required: false + type: string + exclude: + description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2' + default: 'None' + required: false + type: string + +permissions: + contents: write + +jobs: + define_matrix: + name: Define Build Matrix + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + defaults: + run: + shell: pwsh + env: + CONFIGIN: ${{ inputs.config }} + EXCLUDEIN: ${{ inputs.exclude }} + + steps: + - name: Define Job Output + id: set-matrix + run: | + $matrix = @{ + 'os' = 'ubuntu-20.04', 'windows-latest' + 'pyver' = "3.11", "3.10", "3.9", "3.8" + 'cuda' = "11.7.1", "11.8.0", "12.0.1", "12.1.1", "12.2.0" + 'avxver' = "AVX2", "AVX", "basic" + } + + if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})} + + if ($env:EXCLUDEIN -ne 'None') { + $exclusions = @() + $exclusions += $env:EXCLUDEIN.split(';').replace(':','=').replace(',',"`n") | ConvertFrom-StringData + $matrix['exclude'] = $exclusions + } + + $matrixOut = ConvertTo-Json $matrix -Compress + Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT + + build_wheels: + name: Build Wheel ${{ matrix.os }} ${{ matrix.pyver }} ${{ matrix.cuda }} ${{ matrix.avxver }} + needs: define_matrix + runs-on: ${{ matrix.os }} + strategy: + matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }} + defaults: + run: + shell: pwsh + env: + CUDAVER: ${{ matrix.cuda }} + AVXVER: ${{ matrix.avxver }} + PCKGVER: ${{ inputs.version }} + + steps: + - uses: actions/checkout@v4 + with: + repository: 'abetlen/llama-cpp-python' + ref: ${{ inputs.version }} + submodules: 'recursive' + + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.pyver }} + + - name: Setup Mamba + uses: conda-incubator/setup-miniconda@v2.2.0 + with: + activate-environment: "build" + python-version: ${{ matrix.pyver }} + miniforge-variant: Mambaforge + miniforge-version: latest + use-mamba: true + add-pip-as-python-dependency: true + auto-activate-base: false + + - name: VS Integration Cache + id: vs-integration-cache + if: runner.os == 'Windows' + uses: actions/cache@v3.3.2 + with: + path: ./MSBuildExtensions + key: cuda-${{ matrix.cuda }}-vs-integration + + - name: Get Visual Studio Integration + if: runner.os == 'Windows' && steps.vs-integration-cache.outputs.cache-hit != 'true' + run: | + if ($env:CUDAVER -eq '12.1.1') {$x = '12.1.0'} else {$x = $env:CUDAVER} + $links = (Invoke-RestMethod 'https://github.com/Jimver/cuda-toolkit/raw/257a101bc5c656053b5dc220126744980ef7f5b8/src/links/windows-links.ts').Trim().split().where({$_ -ne ''}) + for ($i=$q=0;$i -lt $links.count -and $q -lt 2;$i++) {if ($links[$i] -eq "'$x',") {$q++}} + Invoke-RestMethod $links[$i].Trim("'") -OutFile 'cudainstaller.zip' + & 'C:\Program Files\7-Zip\7z.exe' e cudainstaller.zip -oMSBuildExtensions -r *\MSBuildExtensions\* > $null + Remove-Item 'cudainstaller.zip' + + - name: Install Visual Studio Integration + if: runner.os == 'Windows' + run: | + $y = (gi '.\MSBuildExtensions').fullname + '\*' + (gi 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Microsoft\VC\*\BuildCustomizations').fullname.foreach({cp $y $_}) + $cupath = 'CUDA_PATH_V' + $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','_') + echo "$cupath=$env:CONDA_PREFIX" >> $env:GITHUB_ENV + + - name: Install Dependencies + run: | + $cudaVersion = $env:CUDAVER + $cudaChannels = '' + $cudaNum = [int]$cudaVersion.substring($cudaVersion.LastIndexOf('.')+1) + while ($cudaNum -ge 0) { $cudaChannels += '-c nvidia/label/cuda-' + $cudaVersion.Remove($cudaVersion.LastIndexOf('.')+1) + $cudaNum + ' '; $cudaNum-- } + mamba install -y 'cuda' $cudaChannels.TrimEnd().Split() + if (!(mamba list cuda)[-1].contains('cuda')) {sleep -s 10; mamba install -y 'cuda' $cudaChannels.TrimEnd().Split()} + if (!(mamba list cuda)[-1].contains('cuda')) {throw 'CUDA Toolkit failed to install!'} + python -m pip install build wheel + + - name: Change Package Name + run: | + $packageVersion = [version]$env:PCKGVER.TrimStart('v') + $pyproject = Get-Content 'pyproject.toml' -raw + $cmakelists = Get-Content 'CMakeLists.txt' -raw + if ($packageVersion -lt [version]'0.2.0') { + $setup = Get-Content 'setup.py' -raw + $regexstr = '(?s)name="llama_cpp_python",(.+)(package_dir={"llama_cpp": "llama_cpp", "llama_cpp.server": "llama_cpp/server"},.+?packages=\["llama_cpp", "llama_cpp.server"],)' + if ($packageVersion -gt [version]'0.1.77') {$regexstr = '(?s)name="llama_cpp_python",(.+)(package_dir={"llama_cpp": "llama_cpp", "llama_cpp.server": "llama_cpp/server"},.+?package_data={"llama_cpp": \["py.typed"]},.+?packages=\["llama_cpp", "llama_cpp.server"],)'} + $regexmatch = [Regex]::Matches($setup,$regexstr) + if (!($regexmatch[0].Success)) {throw 'setup.py parsing failed'} + $newstr = 'name="llama_cpp_python_cuda_tensorcores",' + $regexmatch[0].Groups[1].Value + $regexmatch[0].Groups[2].Value.Replace('llama_cpp','llama_cpp_cuda_tensorcores') + $newsetup = $regexmatch[0].Result(('$`'+$newstr+'$''')) + New-Item 'setup.py' -itemType File -value $newsetup -force + $regexstr = '(?s)(?<=name = ")llama_cpp_python(".+?packages = \[{include = ")llama_cpp(".+)' + $regexmatch = [Regex]::Matches($pyproject,$regexstr) + if (!($regexmatch[0].Success)) {throw 'pyproject.toml parsing failed'} + $newpyproject = $regexmatch[0].Result(('$`'+'llama_cpp_python_cuda_tensorcores'+'$1llama_cpp_cuda_tensorcores$2')) + } else { + $regexstr = '(?s)(?<=\[project]\s+?name = ")llama_cpp_python(".+?all = \[\s+?")llama_cpp_python(\[.+?wheel.packages = \[")llama_cpp("].+?input = ")llama_cpp(?=/__init__.py")' + $regexmatch = [Regex]::Matches($pyproject,$regexstr) + if (!($regexmatch[0].Success)) {throw 'pyproject.toml parsing failed'} + $newpyproject = $regexmatch[0].Result(('$`' + 'llama_cpp_python_cuda_tensorcores' + '$1llama_cpp_cuda_tensorcores$2' + 'llama_cpp_cuda_tensorcores$3llama_cpp_cuda_tensorcores' + '$''')) + } + Copy-Item 'llama_cpp' 'llama_cpp_cuda_tensorcores' -recurse + New-Item 'pyproject.toml' -itemType File -value $newpyproject -force + New-Item 'CMakeLists.txt' -itemType File -value $cmakelists.Replace('llama_cpp','llama_cpp_cuda_tensorcores') -force + + if ($packageVersion -gt [version]'0.2.13') + { + $pyScripts = (Get-ChildItem $(Join-Path '.' 'llama_cpp_cuda_tensorcores' '*.py')) + $pyScripts.fullname.foreach({New-Item $_ -itemType File -value (Get-Content $_ -raw).replace('import llama_cpp.llama','from . import llama') -force}) + } + + - name: Build Wheel + run: | + $packageVersion = [version]$env:PCKGVER.TrimStart('v') + $cudaVersion = $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','') + $env:CUDA_PATH = $env:CONDA_PREFIX + $env:CUDA_HOME = $env:CONDA_PREFIX + if ($IsLinux) {$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH} + $env:VERBOSE = '1' + $env:CMAKE_ARGS = '-DLLAMA_CUBLAS=on -DCMAKE_CUDA_ARCHITECTURES=all' + if ($packageVersion -gt [version]'0.1.68' -and $packageVersion -lt [version]'0.1.71') {$env:CMAKE_ARGS = '-DLLAMA_CUBLAS=on -DCMAKE_CUDA_ARCHITECTURES=35-real;37-real;52;61-real;70-real;72-real;75-real;80-real;86-real;89-real;90'} + if ($packageVersion -gt [version]'0.1.68' -and $packageVersion -lt [version]'0.1.71' -and [version]$env:CUDAVER -ge [version]'12.0') {$env:CMAKE_ARGS = '-DLLAMA_CUBLAS=on -DCMAKE_CUDA_ARCHITECTURES=52;61-real;70-real;72-real;75-real;80-real;86-real;89-real;90'} + if ($packageVersion -gt [version]'0.1.68' -and $packageVersion -lt [version]'0.1.71' -and [version]$env:CUDAVER -lt [version]'11.8') {$env:CMAKE_ARGS = '-DLLAMA_CUBLAS=on -DCMAKE_CUDA_ARCHITECTURES=35-real;37-real;52;61-real;70-real;72-real;75-real;80-real;86'} + if ($packageVersion -gt [version]'0.2.13') {$env:CMAKE_ARGS = "-DLLAMA_NATIVE=off $env:CMAKE_ARGS"} + if ($packageVersion -lt [version]'0.1.66') {$env:CUDAFLAGS = '-arch=all'} + if ($env:AVXVER -eq 'AVX') {$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off'; $cputag = 'avx'} + if ($env:AVXVER -eq 'basic') {$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off'; $cputag = 'basic'} + $buildtag = "+cu$cudaVersion$cputag" + if ($packageVersion -lt [version]'0.2.0') { + $env:FORCE_CMAKE = '1' + python -m build --wheel -C--build-option=egg_info "-C--build-option=--tag-build=$buildtag" + } else { + $initpath = Join-Path '.' 'llama_cpp_cuda_tensorcores' '__init__.py' -resolve + $initcontent = Get-Content $initpath -raw + $regexstr = '(?s)(?<=__version__ \= ")\d+(?:\.\d+)*(?=")' + $regexmatch = [Regex]::Matches($initcontent,$regexstr) + if (!($regexmatch[0].Success)) {throw '__init__.py parsing failed'} + $newinit = $regexmatch[0].Result(('$`' + '$&' + $buildtag + '$''')) + New-Item $initpath -itemType File -value $newinit -force + python -m build --wheel + } + + - name: Upload files to a GitHub release + id: upload-release + uses: svenstaro/upload-release-action@2.7.0 + continue-on-error: true + with: + file: ./dist/*.whl + tag: textgen-webui + file_glob: true + make_latest: false + overwrite: true + + - uses: actions/upload-artifact@v3 + if: steps.upload-release.outcome == 'failure' + with: + name: textgen-webui-wheels + path: ./dist/*.whl From da0bcf7a277d3c90d40289ab2baa91557476ed93 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 3 Jan 2024 20:00:06 -0800 Subject: [PATCH 2/2] Fix a bug --- .github/workflows/build-wheels-prioritized-release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-wheels-prioritized-release.yml b/.github/workflows/build-wheels-prioritized-release.yml index 069ce7cc..21b287d3 100644 --- a/.github/workflows/build-wheels-prioritized-release.yml +++ b/.github/workflows/build-wheels-prioritized-release.yml @@ -69,7 +69,7 @@ jobs: build_textgen_tensorcores: name: Tensor Core CUDA wheels - needs: build_wheels_main + needs: build_textgen_wheels uses: ./.github/workflows/build-wheels-tensorcores.yml with: version: ${{ inputs.version }}