Merge pull request #122 from janhq/unload-model-stop-background #108
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: CI | |
on: | |
push: | |
branches: | |
- main | |
tags: ['v*.*.*'] | |
paths: ['.github/scripts/**','.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu'] | |
pull_request: | |
types: [opened, synchronize, reopened] | |
paths: ['.github/scripts/**','.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu'] | |
env: | |
BRANCH_NAME: ${{ github.head_ref || github.ref_name }} | |
MODEL_URL: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf | |
jobs: | |
create-draft-release: | |
runs-on: ubuntu-latest | |
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') | |
outputs: | |
upload_url: ${{ steps.create_release.outputs.upload_url }} | |
version: ${{ steps.get_version.outputs.version }} | |
permissions: | |
contents: write | |
steps: | |
- name: Extract tag name without v prefix | |
id: get_version | |
run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_ENV && echo "::set-output name=version::${GITHUB_REF#refs/tags/v}" | |
env: | |
GITHUB_REF: ${{ github.ref }} | |
- name: Create Draft Release | |
id: create_release | |
uses: actions/create-release@v1 | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
with: | |
tag_name: ${{ github.ref_name }} | |
release_name: "${{ env.VERSION }}" | |
draft: true | |
prerelease: false | |
ubuntu-amd64-build: | |
runs-on: linux-gpu | |
needs: create-draft-release | |
if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') | |
permissions: | |
contents: write | |
steps: | |
- name: Clone | |
id: checkout | |
uses: actions/checkout@v3 | |
with: | |
submodules: recursive | |
# - name: Dependencies | |
# id: depends | |
# run: | | |
# sudo apt-get update | |
# sudo apt-get install build-essential gcc-8 | |
- name: Build | |
id: make_build | |
run: | | |
./install_deps.sh | |
mkdir build && cd build | |
cmake -DDEBUG=ON .. | |
CC=gcc-8 make -j $(nproc) | |
ls -la | |
- name: Package | |
shell: bash | |
run: | | |
mkdir -p nitro | |
cp build/nitro nitro/ | |
zip -r nitro.zip nitro | |
# run e2e testing | |
cd nitro | |
chmod +x ../.github/scripts/e2e-test-linux-and-mac.sh && ../.github/scripts/e2e-test-linux-and-mac.sh ./nitro ${{ env.MODEL_URL }} | |
- uses: actions/[email protected] | |
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
with: | |
upload_url: ${{ needs.create-draft-release.outputs.upload_url }} | |
asset_path: ./nitro.zip | |
asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-linux-amd64.zip | |
asset_content_type: application/zip | |
ubuntu-amd64-cuda-build: | |
runs-on: linux-gpu | |
needs: create-draft-release | |
if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') | |
permissions: | |
contents: write | |
steps: | |
- name: Clone | |
id: checkout | |
uses: actions/checkout@v3 | |
with: | |
submodules: recursive | |
# - name: Dependencies | |
# id: depends | |
# run: | | |
# sudo apt-get update | |
# sudo apt-get install build-essential gcc-8 uuid-dev | |
- name: Build | |
id: make_build | |
run: | | |
./install_deps.sh | |
mkdir build && cd build | |
cmake -DDEBUG=ON -DLLAMA_CUBLAS=ON .. | |
CC=gcc-8 make -j $(nproc) | |
ls -la | |
- name: Package | |
shell: bash | |
run: | | |
mkdir -p nitro | |
cp build/nitro nitro/ | |
zip -r nitro.zip nitro | |
# run e2e testing | |
cd nitro | |
chmod +x ../.github/scripts/e2e-test-linux-and-mac.sh && ../.github/scripts/e2e-test-linux-and-mac.sh ./nitro ${{ env.MODEL_URL }} | |
- uses: actions/[email protected] | |
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
with: | |
upload_url: ${{ needs.create-draft-release.outputs.upload_url }} | |
asset_path: ./nitro.zip | |
asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-linux-amd64-cuda.zip | |
asset_content_type: application/zip | |
macOS-M-build: | |
runs-on: mac-silicon | |
needs: create-draft-release | |
if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') | |
permissions: | |
contents: write | |
steps: | |
- name: Clone | |
id: checkout | |
uses: actions/checkout@v3 | |
with: | |
submodules: recursive | |
- name: Dependencies | |
id: depends | |
continue-on-error: true | |
run: | | |
brew update | |
brew install cmake gcc@8 | |
- name: Build | |
id: cmake_build | |
run: | | |
./install_deps.sh | |
mkdir build && cd build | |
cmake .. | |
CC=gcc-8 make -j $(nproc) | |
ls -la | |
- name: Package | |
shell: bash | |
run: | | |
mkdir -p nitro | |
cp llama.cpp/ggml-metal.metal nitro/ | |
cp build/nitro nitro/ | |
zip -r nitro.zip nitro | |
# run e2e testing | |
cd nitro | |
chmod +x ../.github/scripts/e2e-test-linux-and-mac.sh && ../.github/scripts/e2e-test-linux-and-mac.sh ./nitro ${{ env.MODEL_URL }} | |
- uses: actions/[email protected] | |
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
with: | |
upload_url: ${{ needs.create-draft-release.outputs.upload_url }} | |
asset_path: ./nitro.zip | |
asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-mac-arm64.zip | |
asset_content_type: application/zip | |
macOS-Intel-build: | |
runs-on: macos-latest | |
needs: create-draft-release | |
if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') | |
permissions: | |
contents: write | |
steps: | |
- name: Clone | |
id: checkout | |
uses: actions/checkout@v3 | |
with: | |
submodules: recursive | |
- name: Dependencies | |
id: depends | |
continue-on-error: true | |
run: | | |
brew update | |
- name: Build | |
id: cmake_build | |
run: | | |
./install_deps.sh | |
mkdir build && cd build | |
cmake -DLLAMA_METAL=OFF .. | |
CC=gcc-8 make -j $(nproc) | |
ls -la | |
- name: Package | |
shell: bash | |
run: | | |
mkdir -p nitro | |
cp build/nitro nitro/ | |
zip -r nitro.zip nitro | |
# run e2e testing | |
cd nitro | |
chmod +x ../.github/scripts/e2e-test-linux-and-mac.sh && ../.github/scripts/e2e-test-linux-and-mac.sh ./nitro ${{ env.MODEL_URL }} | |
- uses: actions/[email protected] | |
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
with: | |
upload_url: ${{ needs.create-draft-release.outputs.upload_url }} | |
asset_path: ./nitro.zip | |
asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-mac-amd64.zip | |
asset_content_type: application/zip | |
windows-amd64-build: | |
runs-on: windows-latest | |
needs: create-draft-release | |
if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') | |
permissions: | |
contents: write | |
env: | |
OPENBLAS_VERSION: 0.3.23 | |
OPENCL_VERSION: 2023.04.17 | |
CLBLAST_VERSION: 1.6.0 | |
strategy: | |
matrix: | |
include: | |
- build: 'normal' | |
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"' | |
steps: | |
- name: Clone | |
id: checkout | |
uses: actions/checkout@v3 | |
with: | |
submodules: recursive | |
- name: Setup VSWhere.exe | |
uses: warrenbuckley/Setup-VSWhere@v1 | |
with: | |
version: latest | |
silent: true | |
env: | |
ACTIONS_ALLOW_UNSECURE_COMMANDS: true | |
- name: actions-setup-cmake | |
uses: jwlawson/[email protected] | |
- name: Build | |
id: cmake_build | |
shell: cmd | |
run: | | |
cmake -S ./nitro_deps -B ./build_deps/nitro_deps | |
cmake --build ./build_deps/nitro_deps --config Release | |
mkdir -p build | |
cd build | |
cmake .. | |
cmake --build . --config Release -j 4 | |
- name: Pack artifacts | |
id: pack_artifacts | |
shell: cmd | |
run: | | |
robocopy build_deps\_install\bin .\build\Release zlib.dll | |
robocopy build\bin\Release .\build\Release llama.dll | |
7z a nitro.zip .\build\Release\* | |
cd .\build\Release | |
..\..\.github\scripts\e2e-test-windows.bat .\nitro.exe ${{ env.MODEL_URL }} | |
- uses: actions/[email protected] | |
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
with: | |
upload_url: ${{ needs.create-draft-release.outputs.upload_url }} | |
asset_path: ./nitro.zip | |
asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-win-amd64.zip | |
asset_content_type: application/zip | |
windows-amd64-cuda-build: | |
runs-on: windows-nvidia | |
needs: create-draft-release | |
if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') | |
permissions: | |
contents: write | |
strategy: | |
matrix: | |
cuda: ['12.2.0'] | |
build: ['cublas'] | |
steps: | |
- name: Clone | |
id: checkout | |
uses: actions/checkout@v3 | |
with: | |
submodules: recursive | |
- name: Setup VSWhere.exe | |
uses: warrenbuckley/Setup-VSWhere@v1 | |
with: | |
version: latest | |
silent: true | |
env: | |
ACTIONS_ALLOW_UNSECURE_COMMANDS: true | |
- name: Build | |
id: cmake_build | |
shell: cmd | |
run: | | |
cmake -S ./nitro_deps -B ./build_deps/nitro_deps | |
cmake --build ./build_deps/nitro_deps --config Release | |
mkdir -p build | |
cd build | |
cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON | |
cmake --build . --config Release -j 4 | |
- name: Pack artifacts | |
id: pack_artifacts | |
shell: cmd | |
run: | | |
set PATH=%PATH%;C:\Program Files\7-Zip\ | |
echo %PATH% | |
robocopy build_deps\_install\bin .\build\Release zlib.dll | |
robocopy build\bin\Release .\build\Release llama.dll | |
7z a nitro.zip .\build\Release\* | |
cd .\build\Release | |
..\..\.github\scripts\e2e-test-windows.bat .\nitro.exe ${{ env.MODEL_URL }} | |
- uses: actions/[email protected] | |
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
with: | |
upload_url: ${{ needs.create-draft-release.outputs.upload_url }} | |
asset_path: ./nitro.zip | |
asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-win-amd64-cuda.zip | |
asset_content_type: application/zip | |
update_release_draft: | |
needs: [ubuntu-amd64-build, ubuntu-amd64-cuda-build, macOS-M-build, macOS-Intel-build, windows-amd64-build, windows-amd64-cuda-build] | |
permissions: | |
contents: write | |
pull-requests: write | |
runs-on: ubuntu-latest | |
steps: | |
- uses: release-drafter/release-drafter@v5 | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} |