diff --git a/.github/workflows/build-wheels-cuda.yaml b/.github/workflows/build-wheels-cuda.yaml index 17daaa12a..2cbbd24d6 100644 --- a/.github/workflows/build-wheels-cuda.yaml +++ b/.github/workflows/build-wheels-cuda.yaml @@ -2,179 +2,79 @@ name: Build Wheels (CUDA) on: workflow_dispatch -permissions: - contents: write - jobs: - define_matrix: - name: Define Build Matrix - runs-on: ubuntu-22.04 - outputs: - matrix: ${{ steps.set-matrix.outputs.matrix }} - defaults: - run: - shell: pwsh - - steps: - - name: Define Job Output - id: set-matrix - run: | - $matrix = @{ - 'os' = @('ubuntu-22.04') #, 'windows-2022') - 'pyver' = @("3.9", "3.10", "3.11", "3.12") - 'cuda' = @("12.1.1", "12.2.2", "12.3.2", "12.4.1") #, "12.5.1", "12.6.1") - 'releasetag' = @("basic") - } - - $matrixOut = ConvertTo-Json $matrix -Compress - Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT - build_wheels: - name: Build Wheel ${{ matrix.os }} ${{ matrix.pyver }} ${{ matrix.cuda }} ${{ matrix.releasetag == 'wheels' && 'AVX2' || matrix.releasetag }} - needs: define_matrix - runs-on: ${{ matrix.os }} + name: Build Wheel ${{ matrix.runtime.os }} ${{ matrix.cuda.version }} + runs-on: ${{ matrix.runtime.os }} + container: ${{ case(startsWith(matrix.runtime.os, 'ubuntu'), format('nvidia/cuda:{0}-devel-rockylinux9', matrix.cuda.version), null) }} strategy: - matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }} - defaults: - run: - shell: pwsh - env: - CUDAVER: ${{ matrix.cuda }} - AVXVER: ${{ matrix.releasetag }} + fail-fast: false + matrix: + runtime: + - os: ubuntu-latest + repair-command: "uvx --with patchelf auditwheel repair -w wheelhouse/ dist/*.whl" + - os: ubuntu-24.04-arm + repair-command: "uvx --with patchelf auditwheel repair -w wheelhouse/ dist/*.whl" + - os: windows-latest + repair-command: "uvx delvewheel repair -w wheelhouse/ dist/*.whl" + cuda: + - version: "12.6.0" + tag: "126" + - version: "12.8.0" + tag: "128" + - version: "13.0.0" + tag: "130" + + permissions: + contents: write steps: - - name: Add MSBuild to PATH - if: runner.os == 'Windows' - uses: microsoft/setup-msbuild@v2 - with: - vs-version: '[16.11,16.12)' + - name: Install dependencies (Linux) + if: runner.os == 'Linux' + run: | + dnf group install -y "Development Tools" + dnf install -y openssl-devel - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: submodules: "recursive" - - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.pyver }} - cache: 'pip' - - - name: Setup Mamba - uses: conda-incubator/setup-miniconda@v3.1.0 - with: - activate-environment: "llamacpp" - python-version: ${{ matrix.pyver }} - miniforge-version: latest - add-pip-as-python-dependency: true - auto-activate-base: false + - uses: astral-sh/setup-uv@v7 - - name: VS Integration Cache - id: vs-integration-cache + - name: Setup CUDA (Windows) + uses: Jimver/cuda-toolkit@v0.2.35 if: runner.os == 'Windows' - uses: actions/cache@v4 with: - path: ./MSBuildExtensions - key: cuda-${{ matrix.cuda }}-vs-integration + cuda: ${{ matrix.cuda.version }} + use-local-cache: false + use-github-cache: false + log-file-suffix: ${{ matrix.cuda.version }} - - name: Get Visual Studio Integration - if: runner.os == 'Windows' && steps.vs-integration-cache.outputs.cache-hit != 'true' + - name: Extend $LD_LIBRARY_PATH (Linux) + if: runner.os == 'Linux' run: | - if ($env:CUDAVER -eq '12.1.1') {$x = '12.1.0'} else {$x = $env:CUDAVER} - $links = (Invoke-RestMethod 'https://raw.githubusercontent.com/Jimver/cuda-toolkit/master/src/links/windows-links.ts').Trim().split().where({$_ -ne ''}) - for ($i=$q=0;$i -lt $links.count -and $q -lt 2;$i++) {if ($links[$i] -eq "'$x',") {$q++}} - Invoke-RestMethod $links[$i].Trim("'") -OutFile 'cudainstaller.zip' - & 'C:\Program Files\7-Zip\7z.exe' e cudainstaller.zip -oMSBuildExtensions -r *\MSBuildExtensions\* > $null - Remove-Item 'cudainstaller.zip' + echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/compat" >> $GITHUB_ENV - - name: Install Visual Studio Integration - if: runner.os == 'Windows' - run: | - $y = (gi '.\MSBuildExtensions').fullname + '\*' - (gi 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Microsoft\VC\*\BuildCustomizations').fullname.foreach({cp $y $_}) - $cupath = 'CUDA_PATH_V' + $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','_') - echo "$cupath=$env:CONDA_PREFIX" >> $env:GITHUB_ENV - - - name: Install Dependencies + - name: Build wheel env: - MAMBA_DOWNLOAD_FAILFAST: "0" - MAMBA_NO_LOW_SPEED_LIMIT: "1" - run: | - $cudaVersion = $env:CUDAVER - $cudaChannel = "nvidia/label/cuda-$cudaVersion" - if ($IsLinux) { - # Keep nvcc, cudart, and headers on the same NVIDIA label so the - # detected toolkit version matches the published wheel tag. - mamba install -y --channel-priority flexible --override-channels -c $cudaChannel "$cudaChannel::cuda-toolkit=$cudaVersion" "$cudaChannel::cuda-nvcc_linux-64=$cudaVersion" "$cudaChannel::cuda-cudart" "$cudaChannel::cuda-cudart-dev" - } else { - mamba install -y --channel-priority flexible --override-channels -c $cudaChannel "$cudaChannel::cuda-toolkit=$cudaVersion" - } - if ($LASTEXITCODE -ne 0) { - exit $LASTEXITCODE - } - python -m pip install build wheel + CMAKE_ARGS: "-DGGML_CUDA=ON -DGGML_NATIVE=OFF" + CMAKE_BUILD_PARALLEL_LEVEL: "8" + run: uv build --wheel - - name: Build Wheel + - name: Repair wheel run: | - $env:CUDA_PATH = $env:CONDA_PREFIX - $env:CUDA_HOME = $env:CONDA_PREFIX - $env:CUDA_TOOLKIT_ROOT_DIR = $env:CONDA_PREFIX - $cudaHostCompilerArg = '' - $env:CMAKE_ARGS = '' - if ($IsLinux) { - if (Test-Path '/usr/bin/g++-12') { - $env:CC = '/usr/bin/gcc-12' - $env:CXX = '/usr/bin/g++-12' - $env:CUDAHOSTCXX = '/usr/bin/g++-12' - $cudaHostCompilerArg = " -DCMAKE_CUDA_HOST_COMPILER=$env:CUDAHOSTCXX" - } - if (Test-Path (Join-Path $env:CONDA_PREFIX 'include/cuda_runtime.h')) { - $env:CUDAToolkit_ROOT = $env:CONDA_PREFIX - $env:CUDA_TOOLKIT_ROOT_DIR = $env:CONDA_PREFIX - $env:CMAKE_ARGS = "-DCUDAToolkit_ROOT=$env:CONDA_PREFIX -DCUDA_TOOLKIT_ROOT_DIR=$env:CONDA_PREFIX$cudaHostCompilerArg" - $env:CPATH = "$env:CONDA_PREFIX/include:$env:CPATH" - $env:CPLUS_INCLUDE_PATH = "$env:CONDA_PREFIX/include:$env:CPLUS_INCLUDE_PATH" - $env:LIBRARY_PATH = "$env:CONDA_PREFIX/lib:$env:LIBRARY_PATH" - $env:LD_LIBRARY_PATH = "$env:CONDA_PREFIX/lib:$env:LD_LIBRARY_PATH" - } else { - $env:CMAKE_ARGS = $cudaHostCompilerArg.Trim() - } - } - $nvccPath = Join-Path $env:CONDA_PREFIX 'bin/nvcc' - if (-not (Test-Path $nvccPath)) { - $nvccPath = Join-Path $env:CONDA_PREFIX 'targets/x86_64-linux/bin/nvcc' - } - if (-not (Test-Path $nvccPath)) { - throw 'Failed to find nvcc in the conda environment' - } - $env:CUDACXX = $nvccPath - $env:PATH = "$(Split-Path $nvccPath):$env:PATH" - $nvccVersion = ((& $nvccPath --version) | Select-String 'release ([0-9]+\.[0-9]+)').Matches[0].Groups[1].Value - if (-not $nvccVersion) { - throw 'Failed to detect the installed CUDA toolkit version' - } - $cudaTagVersion = $nvccVersion.Replace('.','') - $env:VERBOSE = '1' - # Build real cubins for the supported GPUs, including sm_70, and keep - # one forward-compatible PTX target instead of embedding PTX for every - # SM. This keeps the wheel under GitHub's 2 GiB release-asset limit. - $env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON -DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=70-real;75-real;80-real;86-real;89-real;90-real;90-virtual -DCMAKE_CUDA_FLAGS=--allow-unsupported-compiler $env:CMAKE_ARGS" - # if ($env:AVXVER -eq 'AVX') { - $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off' - # } - # if ($env:AVXVER -eq 'AVX512') { - # $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX512=on' - # } - # if ($env:AVXVER -eq 'basic') { - # $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX=off -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off' - # } - python -m build --wheel - # Publish tags that reflect the actual installed toolkit version. - Write-Output "CUDA_VERSION=$cudaTagVersion" >> $env:GITHUB_ENV + ${{ matrix.runtime.repair-command }} + ls wheelhouse/ + + - uses: actions/upload-artifact@v7 + with: + name: ${{ matrix.runtime.os }}-cu${{ matrix.cuda.tag }} + path: wheelhouse/ - uses: softprops/action-gh-release@v2 if: startsWith(github.ref, 'refs/tags/') with: - files: dist/* - # Set tag_name to -cu - tag_name: ${{ github.ref_name }}-cu${{ env.CUDA_VERSION }} + files: wheelhouse/* + tag_name: ${{ github.ref_name }}-cu${{ matrix.cuda.tag }} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}