From 3b6aa9f24ac22170dec563e0566a57b2dc0b97fe Mon Sep 17 00:00:00 2001 From: sangjanai Date: Wed, 14 May 2025 13:24:50 +0700 Subject: [PATCH 1/5] test: logit_bias --- .../workflows/template-quality-gate-pr.yml | 400 +++++++++--------- src/llama_engine.cc | 8 + src/llama_server_context.h | 5 +- 3 files changed, 211 insertions(+), 202 deletions(-) diff --git a/.github/workflows/template-quality-gate-pr.yml b/.github/workflows/template-quality-gate-pr.yml index 50fad65..5f4ce8b 100644 --- a/.github/workflows/template-quality-gate-pr.yml +++ b/.github/workflows/template-quality-gate-pr.yml @@ -36,110 +36,110 @@ jobs: # vulkan: false # ccache: true # ccache-dir: "/home/runner/.ccache" - - os: "linux" - name: "amd64-avx2" - runs-on: "ubuntu-20-04" - cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" - run-e2e: true - vulkan: false - ccache: true - ccache-dir: "/home/runner/.ccache" - - os: "linux" - name: "amd64-noavx" - runs-on: "ubuntu-20-04" - cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" - run-e2e: false - vulkan: false - ccache: true - ccache-dir: "/home/runner/.ccache" - - os: "linux" - name: "amd64-avx" - runs-on: "ubuntu-20-04" - cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" - run-e2e: false - vulkan: false - ccache: true - ccache-dir: "/home/runner/.ccache" - - os: "linux" - name: "amd64-avx512" - runs-on: "ubuntu-20-04" - cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" - run-e2e: false - vulkan: false - ccache: true - ccache-dir: "/home/runner/.ccache" - - os: "linux" - name: "amd64-vulkan" - runs-on: "ubuntu-22-04" - cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_VULKAN=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" - run-e2e: false - vulkan: true - ccache: true - ccache-dir: "/home/runner/.ccache" - - os: "linux" - name: "amd64-noavx-cuda-11-7" - runs-on: "ubuntu-20-04-cuda-11-7" - cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" - run-e2e: false - vulkan: false - ccache: true - ccache-dir: "/home/runner/.ccache" - - os: "linux" - name: "amd64-avx2-cuda-11-7" - runs-on: "ubuntu-20-04-cuda-11-7" - cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" - run-e2e: false - vulkan: false - ccache: true - ccache-dir: "/home/runner/.ccache" - - os: "linux" - name: "amd64-avx-cuda-11-7" - runs-on: "ubuntu-20-04-cuda-11-7" - cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" - run-e2e: false - vulkan: false - ccache: true - ccache-dir: "/home/runner/.ccache" - - os: "linux" - name: "amd64-avx512-cuda-11-7" - runs-on: "ubuntu-20-04-cuda-11-7" - cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" - run-e2e: false - vulkan: false - ccache: true - ccache-dir: "/home/runner/.ccache" - - os: "linux" - name: "amd64-noavx-cuda-12-0" - runs-on: "ubuntu-20-04-cuda-12-0" - cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DGGML_AVX2=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" - run-e2e: false - vulkan: false - ccache: true - ccache-dir: "/home/runner/.ccache" - - os: "linux" - name: "amd64-avx2-cuda-12-0" - runs-on: "ubuntu-20-04-cuda-12-0" - cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" - run-e2e: false - vulkan: false - ccache: true - ccache-dir: "/home/runner/.ccache" - - os: "linux" - name: "amd64-avx-cuda-12-0" - runs-on: "ubuntu-20-04-cuda-12-0" - cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" - run-e2e: false - vulkan: false - ccache: true - ccache-dir: "/home/runner/.ccache" - - os: "linux" - name: "amd64-avx512-cuda-12-0" - runs-on: "ubuntu-20-04-cuda-12-0" - cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" - run-e2e: false - vulkan: false - ccache: true - ccache-dir: "/home/runner/.ccache" + # - os: "linux" + # name: "amd64-avx2" + # runs-on: "ubuntu-20-04" + # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" + # run-e2e: true + # vulkan: false + # ccache: true + # ccache-dir: "/home/runner/.ccache" + # - os: "linux" + # name: "amd64-noavx" + # runs-on: "ubuntu-20-04" + # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" + # run-e2e: false + # vulkan: false + # ccache: true + # ccache-dir: "/home/runner/.ccache" + # - os: "linux" + # name: "amd64-avx" + # runs-on: "ubuntu-20-04" + # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" + # run-e2e: false + # vulkan: false + # ccache: true + # ccache-dir: "/home/runner/.ccache" + # - os: "linux" + # name: "amd64-avx512" + # runs-on: "ubuntu-20-04" + # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" + # run-e2e: false + # vulkan: false + # ccache: true + # ccache-dir: "/home/runner/.ccache" + # - os: "linux" + # name: "amd64-vulkan" + # runs-on: "ubuntu-22-04" + # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_VULKAN=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" + # run-e2e: false + # vulkan: true + # ccache: true + # ccache-dir: "/home/runner/.ccache" + # - os: "linux" + # name: "amd64-noavx-cuda-11-7" + # runs-on: "ubuntu-20-04-cuda-11-7" + # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" + # run-e2e: false + # vulkan: false + # ccache: true + # ccache-dir: "/home/runner/.ccache" + # - os: "linux" + # name: "amd64-avx2-cuda-11-7" + # runs-on: "ubuntu-20-04-cuda-11-7" + # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" + # run-e2e: false + # vulkan: false + # ccache: true + # ccache-dir: "/home/runner/.ccache" + # - os: "linux" + # name: "amd64-avx-cuda-11-7" + # runs-on: "ubuntu-20-04-cuda-11-7" + # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" + # run-e2e: false + # vulkan: false + # ccache: true + # ccache-dir: "/home/runner/.ccache" + # - os: "linux" + # name: "amd64-avx512-cuda-11-7" + # runs-on: "ubuntu-20-04-cuda-11-7" + # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" + # run-e2e: false + # vulkan: false + # ccache: true + # ccache-dir: "/home/runner/.ccache" + # - os: "linux" + # name: "amd64-noavx-cuda-12-0" + # runs-on: "ubuntu-20-04-cuda-12-0" + # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DGGML_AVX2=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" + # run-e2e: false + # vulkan: false + # ccache: true + # ccache-dir: "/home/runner/.ccache" + # - os: "linux" + # name: "amd64-avx2-cuda-12-0" + # runs-on: "ubuntu-20-04-cuda-12-0" + # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" + # run-e2e: false + # vulkan: false + # ccache: true + # ccache-dir: "/home/runner/.ccache" + # - os: "linux" + # name: "amd64-avx-cuda-12-0" + # runs-on: "ubuntu-20-04-cuda-12-0" + # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" + # run-e2e: false + # vulkan: false + # ccache: true + # ccache-dir: "/home/runner/.ccache" + # - os: "linux" + # name: "amd64-avx512-cuda-12-0" + # runs-on: "ubuntu-20-04-cuda-12-0" + # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" + # run-e2e: false + # vulkan: false + # ccache: true + # ccache-dir: "/home/runner/.ccache" - os: "mac" name: "amd64" runs-on: "macos-selfhosted-12" @@ -164,102 +164,102 @@ jobs: vulkan: false ccache: false ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - os: "windows" - name: "amd64-noavx" - runs-on: "windows-cuda-11-7" - cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja" - run-e2e: false - vulkan: false - ccache: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - os: "windows" - name: "amd64-avx" - runs-on: "windows-cuda-12-0" - cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX2=OFF -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja" - run-e2e: true - vulkan: false - ccache: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - os: "windows" - name: "amd64-avx512" - runs-on: "windows-cuda-12-0" - cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX512=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja" - run-e2e: false - vulkan: false - ccache: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - os: "windows" - name: "amd64-vulkan" - runs-on: "windows-cuda-12-0" - cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_VULKAN=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja" - run-e2e: false - vulkan: true - ccache: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - os: "windows" - name: "amd64-noavx-cuda-12-0" - runs-on: "windows-cuda-12-0" - cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - vulkan: false - ccache: true - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - os: "windows" - name: "amd64-avx2-cuda-12-0" - runs-on: "windows-cuda-12-0" - cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - vulkan: false - ccache: true - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - os: "windows" - name: "amd64-avx-cuda-12-0" - runs-on: "windows-cuda-12-0" - cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - vulkan: false - ccache: true - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - os: "windows" - name: "amd64-avx512-cuda-12-0" - runs-on: "windows-cuda-12-0" - cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - vulkan: false - ccache: true - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - os: "windows" - name: "amd64-noavx-cuda-11-7" - runs-on: "windows-cuda-11-7" - cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - vulkan: false - ccache: true - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - os: "windows" - name: "amd64-avx2-cuda-11-7" - runs-on: "windows-cuda-11-7" - cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - vulkan: false - ccache: true - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - os: "windows" - name: "amd64-avx-cuda-11-7" - runs-on: "windows-cuda-11-7" - cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - vulkan: false - ccache: true - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - os: "windows" - name: "amd64-avx512-cuda-11-7" - runs-on: "windows-cuda-11-7" - cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - vulkan: false - ccache: true - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + # - os: "windows" + # name: "amd64-noavx" + # runs-on: "windows-cuda-11-7" + # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja" + # run-e2e: false + # vulkan: false + # ccache: false + # ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + # - os: "windows" + # name: "amd64-avx" + # runs-on: "windows-cuda-12-0" + # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX2=OFF -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja" + # run-e2e: true + # vulkan: false + # ccache: false + # ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + # - os: "windows" + # name: "amd64-avx512" + # runs-on: "windows-cuda-12-0" + # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX512=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja" + # run-e2e: false + # vulkan: false + # ccache: false + # ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + # - os: "windows" + # name: "amd64-vulkan" + # runs-on: "windows-cuda-12-0" + # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_VULKAN=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja" + # run-e2e: false + # vulkan: true + # ccache: false + # ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + # - os: "windows" + # name: "amd64-noavx-cuda-12-0" + # runs-on: "windows-cuda-12-0" + # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" + # run-e2e: false + # vulkan: false + # ccache: true + # ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + # - os: "windows" + # name: "amd64-avx2-cuda-12-0" + # runs-on: "windows-cuda-12-0" + # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" + # run-e2e: false + # vulkan: false + # ccache: true + # ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + # - os: "windows" + # name: "amd64-avx-cuda-12-0" + # runs-on: "windows-cuda-12-0" + # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" + # run-e2e: false + # vulkan: false + # ccache: true + # ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + # - os: "windows" + # name: "amd64-avx512-cuda-12-0" + # runs-on: "windows-cuda-12-0" + # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" + # run-e2e: false + # vulkan: false + # ccache: true + # ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + # - os: "windows" + # name: "amd64-noavx-cuda-11-7" + # runs-on: "windows-cuda-11-7" + # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" + # run-e2e: false + # vulkan: false + # ccache: true + # ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + # - os: "windows" + # name: "amd64-avx2-cuda-11-7" + # runs-on: "windows-cuda-11-7" + # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" + # run-e2e: false + # vulkan: false + # ccache: true + # ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + # - os: "windows" + # name: "amd64-avx-cuda-11-7" + # runs-on: "windows-cuda-11-7" + # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" + # run-e2e: false + # vulkan: false + # ccache: true + # ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + # - os: "windows" + # name: "amd64-avx512-cuda-11-7" + # runs-on: "windows-cuda-11-7" + # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" + # run-e2e: false + # vulkan: false + # ccache: true + # ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' steps: - name: Clone diff --git a/src/llama_engine.cc b/src/llama_engine.cc index f1326df..e923a2c 100644 --- a/src/llama_engine.cc +++ b/src/llama_engine.cc @@ -5,6 +5,7 @@ #include #include "json-schema-to-grammar.h" #include "json/writer.h" +#include "src/llama-arch.h" #include "llama_utils.h" #include "trantor/utils/Logger.h" @@ -805,6 +806,13 @@ void LlamaEngine::HandleInferenceImpl( for (const auto& elem : completion.logit_bias) { arr.push_back(llama::inferences::ConvertJsonCppToNlohmann(elem)); } + + if (si.ctx.model->arch == LLM_ARCH_QWEN3) { + json qwen3 = json::array(); + qwen3.push_back("151643"); + qwen3.push_back("-100000"); + arr.push_back(qwen3); + } data["logit_bias"] = std::move(arr); int n_probs = completion.n_probs; const Json::Value& messages = completion.messages; diff --git a/src/llama_server_context.h b/src/llama_server_context.h index 05c581b..16b1cb6 100644 --- a/src/llama_server_context.h +++ b/src/llama_server_context.h @@ -11,6 +11,7 @@ // External +#include "src/llama-model.h" #include "llama_client_slot.h" #if defined(_WIN32) @@ -107,11 +108,11 @@ static T json_value(const json& body, const std::string& key, struct LlamaServerContext { common_init_result llama_init; - + llama_model* model = nullptr; llama_context* ctx = nullptr; - const llama_vocab * vocab = nullptr; + const llama_vocab* vocab = nullptr; clip_ctx* clp_ctx = nullptr; From 3c76c2aa063d4b7a84c57285c17266c16195a90c Mon Sep 17 00:00:00 2001 From: sangjanai Date: Wed, 14 May 2025 13:48:24 +0700 Subject: [PATCH 2/5] chore: add log --- src/llama_engine.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llama_engine.cc b/src/llama_engine.cc index e923a2c..8a5cf19 100644 --- a/src/llama_engine.cc +++ b/src/llama_engine.cc @@ -945,7 +945,7 @@ void LlamaEngine::HandleInferenceImpl( #endif if (is_streamed) { - LOG_INFO << "Request " << request_id << ": " + LOG_INFO << "Request " << request_id << ", data: " << data.dump() << "Streamed, waiting for respone"; auto state = CreateInferenceState(si.ctx); auto model_id = completion.model_id; From a612a6ad17e581499828a4c2ea2dbd62969b5655 Mon Sep 17 00:00:00 2001 From: sangjanai Date: Wed, 14 May 2025 13:56:38 +0700 Subject: [PATCH 3/5] fix: use number --- src/llama_engine.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llama_engine.cc b/src/llama_engine.cc index 8a5cf19..4ae700b 100644 --- a/src/llama_engine.cc +++ b/src/llama_engine.cc @@ -809,8 +809,8 @@ void LlamaEngine::HandleInferenceImpl( if (si.ctx.model->arch == LLM_ARCH_QWEN3) { json qwen3 = json::array(); - qwen3.push_back("151643"); - qwen3.push_back("-100000"); + qwen3.push_back(151643); + qwen3.push_back(-100000); arr.push_back(qwen3); } data["logit_bias"] = std::move(arr); From e8e0c314d5e8cefab1e5a14819e09c9d46296d53 Mon Sep 17 00:00:00 2001 From: sangjanai Date: Wed, 14 May 2025 14:24:43 +0700 Subject: [PATCH 4/5] chore: rm log --- src/llama_engine.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llama_engine.cc b/src/llama_engine.cc index 4ae700b..6e4de49 100644 --- a/src/llama_engine.cc +++ b/src/llama_engine.cc @@ -5,8 +5,8 @@ #include #include "json-schema-to-grammar.h" #include "json/writer.h" -#include "src/llama-arch.h" #include "llama_utils.h" +#include "src/llama-arch.h" #include "trantor/utils/Logger.h" #if defined(_WIN32) @@ -945,7 +945,7 @@ void LlamaEngine::HandleInferenceImpl( #endif if (is_streamed) { - LOG_INFO << "Request " << request_id << ", data: " << data.dump() + LOG_INFO << "Request " << request_id << ": " << "Streamed, waiting for respone"; auto state = CreateInferenceState(si.ctx); auto model_id = completion.model_id; From f30c89213c6b11acf8caf831a4023630cd00e5b9 Mon Sep 17 00:00:00 2001 From: sangjanai Date: Wed, 14 May 2025 15:03:52 +0700 Subject: [PATCH 5/5] chore: add linux and windows builds back --- .../workflows/template-quality-gate-pr.yml | 400 +++++++++--------- 1 file changed, 200 insertions(+), 200 deletions(-) diff --git a/.github/workflows/template-quality-gate-pr.yml b/.github/workflows/template-quality-gate-pr.yml index 5f4ce8b..50fad65 100644 --- a/.github/workflows/template-quality-gate-pr.yml +++ b/.github/workflows/template-quality-gate-pr.yml @@ -36,110 +36,110 @@ jobs: # vulkan: false # ccache: true # ccache-dir: "/home/runner/.ccache" - # - os: "linux" - # name: "amd64-avx2" - # runs-on: "ubuntu-20-04" - # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" - # run-e2e: true - # vulkan: false - # ccache: true - # ccache-dir: "/home/runner/.ccache" - # - os: "linux" - # name: "amd64-noavx" - # runs-on: "ubuntu-20-04" - # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" - # run-e2e: false - # vulkan: false - # ccache: true - # ccache-dir: "/home/runner/.ccache" - # - os: "linux" - # name: "amd64-avx" - # runs-on: "ubuntu-20-04" - # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" - # run-e2e: false - # vulkan: false - # ccache: true - # ccache-dir: "/home/runner/.ccache" - # - os: "linux" - # name: "amd64-avx512" - # runs-on: "ubuntu-20-04" - # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" - # run-e2e: false - # vulkan: false - # ccache: true - # ccache-dir: "/home/runner/.ccache" - # - os: "linux" - # name: "amd64-vulkan" - # runs-on: "ubuntu-22-04" - # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_VULKAN=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" - # run-e2e: false - # vulkan: true - # ccache: true - # ccache-dir: "/home/runner/.ccache" - # - os: "linux" - # name: "amd64-noavx-cuda-11-7" - # runs-on: "ubuntu-20-04-cuda-11-7" - # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" - # run-e2e: false - # vulkan: false - # ccache: true - # ccache-dir: "/home/runner/.ccache" - # - os: "linux" - # name: "amd64-avx2-cuda-11-7" - # runs-on: "ubuntu-20-04-cuda-11-7" - # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" - # run-e2e: false - # vulkan: false - # ccache: true - # ccache-dir: "/home/runner/.ccache" - # - os: "linux" - # name: "amd64-avx-cuda-11-7" - # runs-on: "ubuntu-20-04-cuda-11-7" - # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" - # run-e2e: false - # vulkan: false - # ccache: true - # ccache-dir: "/home/runner/.ccache" - # - os: "linux" - # name: "amd64-avx512-cuda-11-7" - # runs-on: "ubuntu-20-04-cuda-11-7" - # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" - # run-e2e: false - # vulkan: false - # ccache: true - # ccache-dir: "/home/runner/.ccache" - # - os: "linux" - # name: "amd64-noavx-cuda-12-0" - # runs-on: "ubuntu-20-04-cuda-12-0" - # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DGGML_AVX2=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" - # run-e2e: false - # vulkan: false - # ccache: true - # ccache-dir: "/home/runner/.ccache" - # - os: "linux" - # name: "amd64-avx2-cuda-12-0" - # runs-on: "ubuntu-20-04-cuda-12-0" - # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" - # run-e2e: false - # vulkan: false - # ccache: true - # ccache-dir: "/home/runner/.ccache" - # - os: "linux" - # name: "amd64-avx-cuda-12-0" - # runs-on: "ubuntu-20-04-cuda-12-0" - # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" - # run-e2e: false - # vulkan: false - # ccache: true - # ccache-dir: "/home/runner/.ccache" - # - os: "linux" - # name: "amd64-avx512-cuda-12-0" - # runs-on: "ubuntu-20-04-cuda-12-0" - # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" - # run-e2e: false - # vulkan: false - # ccache: true - # ccache-dir: "/home/runner/.ccache" + - os: "linux" + name: "amd64-avx2" + runs-on: "ubuntu-20-04" + cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" + run-e2e: true + vulkan: false + ccache: true + ccache-dir: "/home/runner/.ccache" + - os: "linux" + name: "amd64-noavx" + runs-on: "ubuntu-20-04" + cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" + run-e2e: false + vulkan: false + ccache: true + ccache-dir: "/home/runner/.ccache" + - os: "linux" + name: "amd64-avx" + runs-on: "ubuntu-20-04" + cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" + run-e2e: false + vulkan: false + ccache: true + ccache-dir: "/home/runner/.ccache" + - os: "linux" + name: "amd64-avx512" + runs-on: "ubuntu-20-04" + cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" + run-e2e: false + vulkan: false + ccache: true + ccache-dir: "/home/runner/.ccache" + - os: "linux" + name: "amd64-vulkan" + runs-on: "ubuntu-22-04" + cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_VULKAN=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" + run-e2e: false + vulkan: true + ccache: true + ccache-dir: "/home/runner/.ccache" + - os: "linux" + name: "amd64-noavx-cuda-11-7" + runs-on: "ubuntu-20-04-cuda-11-7" + cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" + run-e2e: false + vulkan: false + ccache: true + ccache-dir: "/home/runner/.ccache" + - os: "linux" + name: "amd64-avx2-cuda-11-7" + runs-on: "ubuntu-20-04-cuda-11-7" + cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" + run-e2e: false + vulkan: false + ccache: true + ccache-dir: "/home/runner/.ccache" + - os: "linux" + name: "amd64-avx-cuda-11-7" + runs-on: "ubuntu-20-04-cuda-11-7" + cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" + run-e2e: false + vulkan: false + ccache: true + ccache-dir: "/home/runner/.ccache" + - os: "linux" + name: "amd64-avx512-cuda-11-7" + runs-on: "ubuntu-20-04-cuda-11-7" + cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" + run-e2e: false + vulkan: false + ccache: true + ccache-dir: "/home/runner/.ccache" + - os: "linux" + name: "amd64-noavx-cuda-12-0" + runs-on: "ubuntu-20-04-cuda-12-0" + cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DGGML_AVX2=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" + run-e2e: false + vulkan: false + ccache: true + ccache-dir: "/home/runner/.ccache" + - os: "linux" + name: "amd64-avx2-cuda-12-0" + runs-on: "ubuntu-20-04-cuda-12-0" + cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" + run-e2e: false + vulkan: false + ccache: true + ccache-dir: "/home/runner/.ccache" + - os: "linux" + name: "amd64-avx-cuda-12-0" + runs-on: "ubuntu-20-04-cuda-12-0" + cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" + run-e2e: false + vulkan: false + ccache: true + ccache-dir: "/home/runner/.ccache" + - os: "linux" + name: "amd64-avx512-cuda-12-0" + runs-on: "ubuntu-20-04-cuda-12-0" + cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja" + run-e2e: false + vulkan: false + ccache: true + ccache-dir: "/home/runner/.ccache" - os: "mac" name: "amd64" runs-on: "macos-selfhosted-12" @@ -164,102 +164,102 @@ jobs: vulkan: false ccache: false ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - # - os: "windows" - # name: "amd64-noavx" - # runs-on: "windows-cuda-11-7" - # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja" - # run-e2e: false - # vulkan: false - # ccache: false - # ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - # - os: "windows" - # name: "amd64-avx" - # runs-on: "windows-cuda-12-0" - # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX2=OFF -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja" - # run-e2e: true - # vulkan: false - # ccache: false - # ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - # - os: "windows" - # name: "amd64-avx512" - # runs-on: "windows-cuda-12-0" - # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX512=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja" - # run-e2e: false - # vulkan: false - # ccache: false - # ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - # - os: "windows" - # name: "amd64-vulkan" - # runs-on: "windows-cuda-12-0" - # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_VULKAN=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja" - # run-e2e: false - # vulkan: true - # ccache: false - # ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - # - os: "windows" - # name: "amd64-noavx-cuda-12-0" - # runs-on: "windows-cuda-12-0" - # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - # run-e2e: false - # vulkan: false - # ccache: true - # ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - # - os: "windows" - # name: "amd64-avx2-cuda-12-0" - # runs-on: "windows-cuda-12-0" - # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - # run-e2e: false - # vulkan: false - # ccache: true - # ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - # - os: "windows" - # name: "amd64-avx-cuda-12-0" - # runs-on: "windows-cuda-12-0" - # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - # run-e2e: false - # vulkan: false - # ccache: true - # ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - # - os: "windows" - # name: "amd64-avx512-cuda-12-0" - # runs-on: "windows-cuda-12-0" - # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - # run-e2e: false - # vulkan: false - # ccache: true - # ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - # - os: "windows" - # name: "amd64-noavx-cuda-11-7" - # runs-on: "windows-cuda-11-7" - # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - # run-e2e: false - # vulkan: false - # ccache: true - # ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - # - os: "windows" - # name: "amd64-avx2-cuda-11-7" - # runs-on: "windows-cuda-11-7" - # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - # run-e2e: false - # vulkan: false - # ccache: true - # ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - # - os: "windows" - # name: "amd64-avx-cuda-11-7" - # runs-on: "windows-cuda-11-7" - # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - # run-e2e: false - # vulkan: false - # ccache: true - # ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - # - os: "windows" - # name: "amd64-avx512-cuda-11-7" - # runs-on: "windows-cuda-11-7" - # cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - # run-e2e: false - # vulkan: false - # ccache: true - # ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + - os: "windows" + name: "amd64-noavx" + runs-on: "windows-cuda-11-7" + cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja" + run-e2e: false + vulkan: false + ccache: false + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + - os: "windows" + name: "amd64-avx" + runs-on: "windows-cuda-12-0" + cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX2=OFF -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja" + run-e2e: true + vulkan: false + ccache: false + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + - os: "windows" + name: "amd64-avx512" + runs-on: "windows-cuda-12-0" + cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX512=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja" + run-e2e: false + vulkan: false + ccache: false + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + - os: "windows" + name: "amd64-vulkan" + runs-on: "windows-cuda-12-0" + cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_VULKAN=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja" + run-e2e: false + vulkan: true + ccache: false + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + - os: "windows" + name: "amd64-noavx-cuda-12-0" + runs-on: "windows-cuda-12-0" + cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" + run-e2e: false + vulkan: false + ccache: true + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + - os: "windows" + name: "amd64-avx2-cuda-12-0" + runs-on: "windows-cuda-12-0" + cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" + run-e2e: false + vulkan: false + ccache: true + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + - os: "windows" + name: "amd64-avx-cuda-12-0" + runs-on: "windows-cuda-12-0" + cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" + run-e2e: false + vulkan: false + ccache: true + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + - os: "windows" + name: "amd64-avx512-cuda-12-0" + runs-on: "windows-cuda-12-0" + cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" + run-e2e: false + vulkan: false + ccache: true + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + - os: "windows" + name: "amd64-noavx-cuda-11-7" + runs-on: "windows-cuda-11-7" + cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" + run-e2e: false + vulkan: false + ccache: true + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + - os: "windows" + name: "amd64-avx2-cuda-11-7" + runs-on: "windows-cuda-11-7" + cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" + run-e2e: false + vulkan: false + ccache: true + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + - os: "windows" + name: "amd64-avx-cuda-11-7" + runs-on: "windows-cuda-11-7" + cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" + run-e2e: false + vulkan: false + ccache: true + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + - os: "windows" + name: "amd64-avx512-cuda-11-7" + runs-on: "windows-cuda-11-7" + cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" + run-e2e: false + vulkan: false + ccache: true + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' steps: - name: Clone