Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
9f38d9a
Updated to `1c5eba6f8e628fb0a98afb27d8aaeb3b0e136451`
martindevans Jun 30, 2024
07be044
Imported binaries from https://github.com/SciSharp/LLamaSharp/actions…
martindevans Jul 1, 2024
8decf37
Update CUDA paths from cu12.1.0 to cu12.2.0
m0nsky Jun 19, 2024
7eaef88
Enabled GGML LTO
martindevans Jul 1, 2024
9ef7eff
Uploading ggml.dll for linux and windows
martindevans Jul 1, 2024
77bb064
Fixed AVX defines from LLAMA_* to GGML_*
martindevans Jul 1, 2024
070b700
Disabled GGML_LTO
martindevans Jul 1, 2024
01c5925
Add ggml dynamic libraries (OSX)
SignalRT Jul 1, 2024
3236d5c
Delete ggml binaries
SignalRT Jul 1, 2024
65ace68
Add ggml libraries OSX
SignalRT Jul 1, 2024
568361d
Fix ggml reference
SignalRT Jul 1, 2024
7ce108b
Upload ggml.dll for Windows, rearrange files in final deps.zip
martindevans Jul 1, 2024
eeb2266
Removed deps folder and ignored it. the `dll` files are now too large…
martindevans Jul 2, 2024
032903c
Added msbuild action to LLamaSharp.csproj which downloads binaries on…
martindevans Jul 4, 2024
4088b2f
Updated csproj to download from "binary release" repo (https://github…
martindevans Jul 8, 2024
ef1b18e
Removed unintentionally duplicated part of csproj
martindevans Jul 8, 2024
14aca41
Spelling fix
martindevans Jul 8, 2024
ccbf40d
Added proper handling of ggml.so for all linux targets
martindevans Jul 8, 2024
5f9b2d2
Fixed path for Linux+CUDA ggml.so
martindevans Jul 8, 2024
eb3e61b
fixed final paths for shared objects
martindevans Jul 8, 2024
0453265
Fixed ggml.so to libggml.so
martindevans Jul 8, 2024
92b14b6
Commit to trigger CI
martindevans Jul 9, 2024
ea0b4ec
Switched some build defines from LLAMA to GGML
martindevans Jul 9, 2024
5bce444
Added libggml.so to LLamaSharp.Runtime.targets
martindevans Jul 9, 2024
ffcfda2
Fixed grammar test
martindevans Jul 9, 2024
81c4230
Include ggml in nuget packages
SignalRT Jul 9, 2024
d4fadf9
Fix possible file locks due to race conditions during build & test
m0nsky Jul 10, 2024
3beb3dc
Merge pull request #836 from m0nsky/july-2024-binaries
martindevans Jul 10, 2024
33020e8
Manually resolving the GGML dependency on Linux/OSX.
m0nsky Jul 10, 2024
28e986f
Unify linux/OSX logic, and don't use full/absolute path
m0nsky Jul 10, 2024
ff3736e
Logging comment
m0nsky Jul 10, 2024
3234a33
Fix inverted if statement
m0nsky Jul 10, 2024
705efa9
Merge pull request #842 from m0nsky/ggml-dependency-linux-osx
martindevans Jul 10, 2024
fc6d4dd
Updated to pull from new source
martindevans Jul 11, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 120 additions & 46 deletions .github/workflows/compile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@ jobs:
matrix:
include:
- build: 'noavx'
defines: '-DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF'
defines: '-DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
- build: 'avx2'
defines: ''
- build: 'avx'
defines: '-DLLAMA_AVX2=OFF'
defines: '-DGGML_AVX2=OFF'
- build: 'avx512'
defines: '-DLLAMA_AVX512=ON'
defines: '-DGGML_AVX512=ON'
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v4
Expand All @@ -54,6 +54,11 @@ jobs:
path: ./build/src/libllama.so
name: llama-bin-linux-${{ matrix.build }}-x64.so
if-no-files-found: error
- uses: actions/upload-artifact@v4
with:
path: ./build/ggml/src/libggml.so
name: ggml-bin-linux-${{ matrix.build }}-x64.so
if-no-files-found: error
- name: Upload Llava
uses: actions/upload-artifact@v4
with:
Expand All @@ -68,13 +73,13 @@ jobs:
matrix:
include:
- build: 'noavx'
defines: '-DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF'
defines: '-DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
- build: 'avx2'
defines: ''
- build: 'avx'
defines: '-DLLAMA_AVX2=OFF'
defines: '-DGGML_AVX2=OFF'
- build: 'avx512'
defines: '-DLLAMA_AVX512=ON -DLLAMA_AVX512_VBMI=ON -DLLAMA_AVX512_VNNI=ON'
defines: '-DGGML_AVX512=ON -DGGML_AVX512_VBMI=ON -DGGML_AVX512_VNNI=ON'
runs-on: windows-latest
steps:
- uses: actions/checkout@v4
Expand All @@ -90,15 +95,22 @@ jobs:
cd build
cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }}
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
tree /f

- name: Upload artifacts
- name: Upload artifacts (llama)
uses: actions/upload-artifact@v4
with:
path: .\build\bin\Release\llama.dll
name: llama-bin-win-${{ matrix.build }}-x64.dll
if-no-files-found: error
- name: Upload artifacts (ggml)
uses: actions/upload-artifact@v4
with:
path: .\build\bin\Release\ggml.dll
name: ggml-bin-win-${{ matrix.build }}-x64.dll
if-no-files-found: error

- name: Upload Llava
- name: Upload artifacts (llava)
uses: actions/upload-artifact@v4
with:
path: .\build\bin\Release\llava_shared.dll
Expand Down Expand Up @@ -147,15 +159,15 @@ jobs:
run: |
mkdir build
cd build
cmake .. ${{ env.COMMON_DEFINE }} -DLLAMA_VULKAN=ON
cmake .. ${{ env.COMMON_DEFINE }} -DGGML_VULKAN=ON
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
ls -R
- name: Build
if: ${{ matrix.os == 'ubuntu-22.04' }}
run: |
mkdir build
cd build
cmake .. ${{ env.COMMON_DEFINE }} -DLLAMA_VULKAN=ON
cmake .. ${{ env.COMMON_DEFINE }} -DGGML_VULKAN=ON
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
ls -R
- name: Upload llama artifacts (Windows)
Expand All @@ -165,6 +177,13 @@ jobs:
path: .\build\bin\Release\llama.dll
name: llama-bin-win-vulkan-x64.dll
if-no-files-found: error
- name: Upload llama artifacts (Windows)
if: ${{ matrix.os == 'windows-latest' }}
uses: actions/upload-artifact@v4
with:
path: .\build\bin\Release\ggml.dll
name: ggml-bin-win-vulkan-x64.dll
if-no-files-found: error
- name: Upload llava artifacts (Windows)
if: ${{ matrix.os == 'windows-latest' }}
uses: actions/upload-artifact@v4
Expand All @@ -179,6 +198,13 @@ jobs:
path: ./build/src/libllama.so
name: llama-bin-linux-vulkan-x64.so
if-no-files-found: error
- name: Upload ggml artifacts (Linux)
if: ${{ matrix.os == 'ubuntu-22.04' }}
uses: actions/upload-artifact@v4
with:
path: ./build/ggml/src/libggml.so
name: ggml-bin-linux-vulkan-x64.so
if-no-files-found: error
- name: Upload llava artifacts (Linux)
if: ${{ matrix.os == 'ubuntu-22.04' }}
uses: actions/upload-artifact@v4
Expand Down Expand Up @@ -236,6 +262,13 @@ jobs:
path: .\build\bin\Release\llama.dll
name: llama-bin-win-cublas-cu${{ matrix.cuda }}-x64.dll
if-no-files-found: error
- name: Upload artifacts (ggml)
if: ${{ matrix.os == 'windows-2019' }}
uses: actions/upload-artifact@v4
with:
path: .\build\bin\Release\ggml.dll
name: ggml-bin-win-cublas-cu${{ matrix.cuda }}-x64.dll
if-no-files-found: error
- name: Upload llava artifacts (Windows)
if: ${{ matrix.os == 'windows-2019' }}
uses: actions/upload-artifact@v4
Expand All @@ -250,6 +283,13 @@ jobs:
path: ./build/src/libllama.so
name: llama-bin-linux-cublas-cu${{ matrix.cuda }}-x64.so
if-no-files-found: error
- name: Upload artifacts ggml (Linux)
if: ${{ matrix.os == 'ubuntu-20.04' }}
uses: actions/upload-artifact@v4
with:
path: ./build/ggml/src/libggml.so
name: ggml-bin-linux-cublas-cu${{ matrix.cuda }}-x64.so
if-no-files-found: error
- name: Upload llava artifacts (Linux)
if: ${{ matrix.os == 'ubuntu-20.04' }}
uses: actions/upload-artifact@v4
Expand All @@ -268,9 +308,9 @@ jobs:
- build: 'arm64'
defines: '-DCMAKE_OSX_ARCHITECTURES=arm64 -DGGML_METAL_EMBED_LIBRARY=ON'
- build: 'x64'
defines: '-DCMAKE_OSX_ARCHITECTURES=x86_64 -DGGML_METAL=OFF -DLLAMA_AVX=ON -DLLAMA_AVX2=ON'
defines: '-DCMAKE_OSX_ARCHITECTURES=x86_64 -DGGML_METAL=OFF -DGGML_AVX=ON -DGGML_AVX2=ON'
- build: 'x64-rosetta2'
defines: '-DCMAKE_OSX_ARCHITECTURES=x86_64 -DGGML_METAL=OFF -DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF'
defines: '-DCMAKE_OSX_ARCHITECTURES=x86_64 -DGGML_METAL=OFF -DGGML_AVX=OFF -DGGML_AVX2=OFF'
runs-on: macos-latest
steps:
- uses: actions/checkout@v4
Expand All @@ -289,7 +329,13 @@ jobs:
cd build
cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }}
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
- name: Upload artifacts
- name: Upload ggml
uses: actions/upload-artifact@v4
with:
path: ./build/ggml/src/libggml.dylib
name: ggml-bin-osx-${{ matrix.build }}.dylib
if-no-files-found: error
- name: Upload llama
uses: actions/upload-artifact@v4
with:
path: ./build/src/libllama.dylib
Expand Down Expand Up @@ -331,54 +377,81 @@ jobs:
# Make all directories at once
mkdir --parents deps/{avx,avx2,avx512,osx-arm64,osx-x64,osx-x64-rosetta2,cu11.7.1,cu12.2.0,vulkan}

cp artifacts/llama-bin-linux-noavx-x64.so/libllama.so deps/libllama.so
cp artifacts/llama-bin-linux-avx-x64.so/libllama.so deps/avx/libllama.so
cp artifacts/llama-bin-linux-avx2-x64.so/libllama.so deps/avx2/libllama.so
cp artifacts/llama-bin-linux-avx512-x64.so/libllama.so deps/avx512/libllama.so

cp artifacts/llama-bin-win-noavx-x64.dll/llama.dll deps/llama.dll
cp artifacts/llama-bin-win-avx-x64.dll/llama.dll deps/avx/llama.dll
cp artifacts/llama-bin-win-avx2-x64.dll/llama.dll deps/avx2/llama.dll
cp artifacts/llama-bin-win-avx512-x64.dll/llama.dll deps/avx512/llama.dll

# Linux
cp artifacts/ggml-bin-linux-noavx-x64.so/libggml.so deps/libggml.so
cp artifacts/llama-bin-linux-noavx-x64.so/libllama.so deps/libllama.so
cp artifacts/llava-bin-linux-noavx-x64.so/libllava_shared.so deps/libllava_shared.so

cp artifacts/ggml-bin-linux-avx-x64.so/libggml.so deps/avx/libggml.so
cp artifacts/llama-bin-linux-avx-x64.so/libllama.so deps/avx/libllama.so
cp artifacts/llava-bin-linux-avx-x64.so/libllava_shared.so deps/avx/libllava_shared.so

cp artifacts/ggml-bin-linux-avx2-x64.so/libggml.so deps/avx2/libggml.so
cp artifacts/llama-bin-linux-avx2-x64.so/libllama.so deps/avx2/libllama.so
cp artifacts/llava-bin-linux-avx2-x64.so/libllava_shared.so deps/avx2/libllava_shared.so

cp artifacts/ggml-bin-linux-avx512-x64.so/libggml.so deps/avx512/libggml.so
cp artifacts/llama-bin-linux-avx512-x64.so/libllama.so deps/avx512/libllama.so
cp artifacts/llava-bin-linux-avx512-x64.so/libllava_shared.so deps/avx512/libllava_shared.so

cp artifacts/llava-bin-win-noavx-x64.dll/llava_shared.dll deps/llava_shared.dll
cp artifacts/llava-bin-win-avx-x64.dll/llava_shared.dll deps/avx/llava_shared.dll
cp artifacts/llava-bin-win-avx2-x64.dll/llava_shared.dll deps/avx2/llava_shared.dll
cp artifacts/llava-bin-win-avx512-x64.dll/llava_shared.dll deps/avx512/llava_shared.dll
# Windows
cp artifacts/ggml-bin-win-noavx-x64.dll/ggml.dll deps/ggml.dll
cp artifacts/llama-bin-win-noavx-x64.dll/llama.dll deps/llama.dll
cp artifacts/llava-bin-win-noavx-x64.dll/llava_shared.dll deps/llava_shared.dll

cp artifacts/ggml-bin-win-avx-x64.dll/ggml.dll deps/avx/ggml.dll
cp artifacts/llama-bin-win-avx-x64.dll/llama.dll deps/avx/llama.dll
cp artifacts/llava-bin-win-avx-x64.dll/llava_shared.dll deps/avx/llava_shared.dll

cp artifacts/llama-bin-osx-arm64.dylib/libllama.dylib deps/osx-arm64/libllama.dylib
cp artifacts/llava-bin-osx-arm64.dylib/libllava_shared.dylib deps/osx-arm64/libllava_shared.dylib
cp artifacts/ggml-metal.metal/ggml-metal.metal deps/osx-arm64/ggml-metal.metal
cp artifacts/ggml-bin-win-avx2-x64.dll/ggml.dll deps/avx2/ggml.dll
cp artifacts/llama-bin-win-avx2-x64.dll/llama.dll deps/avx2/llama.dll
cp artifacts/llava-bin-win-avx2-x64.dll/llava_shared.dll deps/avx2/llava_shared.dll

cp artifacts/llama-bin-osx-x64.dylib/libllama.dylib deps/osx-x64/libllama.dylib
cp artifacts/llava-bin-osx-x64.dylib/libllava_shared.dylib deps/osx-x64/libllava_shared.dylib
cp artifacts/ggml-bin-win-avx512-x64.dll/ggml.dll deps/avx512/ggml.dll
cp artifacts/llama-bin-win-avx512-x64.dll/llama.dll deps/avx512/llama.dll
cp artifacts/llava-bin-win-avx512-x64.dll/llava_shared.dll deps/avx512/llava_shared.dll

cp artifacts/llama-bin-osx-x64-rosetta2.dylib/libllama.dylib deps/osx-x64-rosetta2/libllama.dylib
# MacOS
cp artifacts/ggml-bin-osx-arm64.dylib/libggml.dylib deps/osx-arm64/libggml.dylib
cp artifacts/llama-bin-osx-arm64.dylib/libllama.dylib deps/osx-arm64/libllama.dylib
cp artifacts/llava-bin-osx-arm64.dylib/libllava_shared.dylib deps/osx-arm64/libllava_shared.dylib
cp artifacts/ggml-metal.metal/ggml-metal.metal deps/osx-arm64/ggml-metal.metal

cp artifacts/ggml-bin-osx-x64.dylib/libggml.dylib deps/osx-x64/libggml.dylib
cp artifacts/llama-bin-osx-x64.dylib/libllama.dylib deps/osx-x64/libllama.dylib
cp artifacts/llava-bin-osx-x64.dylib/libllava_shared.dylib deps/osx-x64/libllava_shared.dylib

cp artifacts/ggml-bin-osx-x64-rosetta2.dylib/libggml.dylib deps/osx-x64-rosetta2/libggml.dylib
cp artifacts/llama-bin-osx-x64-rosetta2.dylib/libllama.dylib deps/osx-x64-rosetta2/libllama.dylib
cp artifacts/llava-bin-osx-x64-rosetta2.dylib/libllava_shared.dylib deps/osx-x64-rosetta2/libllava_shared.dylib

cp artifacts/llama-bin-win-cublas-cu11.7.1-x64.dll/llama.dll deps/cu11.7.1/llama.dll
cp artifacts/llava-bin-win-cublas-cu11.7.1-x64.dll/llava_shared.dll deps/cu11.7.1/llava_shared.dll

cp artifacts/llama-bin-linux-cublas-cu11.7.1-x64.so/libllama.so deps/cu11.7.1/libllama.so
cp artifacts/llava-bin-linux-cublas-cu11.7.1-x64.so/libllava_shared.so deps/cu11.7.1/libllava_shared.so
# Windows CUDA
cp artifacts/ggml-bin-win-cublas-cu11.7.1-x64.dll/ggml.dll deps/cu11.7.1/ggml.dll
cp artifacts/llama-bin-win-cublas-cu11.7.1-x64.dll/llama.dll deps/cu11.7.1/llama.dll
cp artifacts/llava-bin-win-cublas-cu11.7.1-x64.dll/llava_shared.dll deps/cu11.7.1/llava_shared.dll

cp artifacts/llama-bin-win-cublas-cu12.2.0-x64.dll/llama.dll deps/cu12.2.0/llama.dll
cp artifacts/llava-bin-win-cublas-cu12.2.0-x64.dll/llava_shared.dll deps/cu12.2.0/llava_shared.dll
cp artifacts/ggml-bin-win-cublas-cu12.2.0-x64.dll/ggml.dll deps/cu12.2.0/ggml.dll
cp artifacts/llama-bin-win-cublas-cu12.2.0-x64.dll/llama.dll deps/cu12.2.0/llama.dll
cp artifacts/llava-bin-win-cublas-cu12.2.0-x64.dll/llava_shared.dll deps/cu12.2.0/llava_shared.dll

# Linux CUDA
cp artifacts/ggml-bin-linux-cublas-cu11.7.1-x64.so/libggml.so deps/cu11.7.1/libggml.so
cp artifacts/llama-bin-linux-cublas-cu11.7.1-x64.so/libllama.so deps/cu11.7.1/libllama.so
cp artifacts/llava-bin-linux-cublas-cu11.7.1-x64.so/libllava_shared.so deps/cu11.7.1/libllava_shared.so

cp artifacts/llama-bin-linux-cublas-cu12.2.0-x64.so/libllama.so deps/cu12.2.0/libllama.so
cp artifacts/llava-bin-linux-cublas-cu12.2.0-x64.so/libllava_shared.so deps/cu12.2.0/libllava_shared.so
cp artifacts/ggml-bin-linux-cublas-cu12.2.0-x64.so/libggml.so deps/cu12.2.0/libggml.so
cp artifacts/llama-bin-linux-cublas-cu12.2.0-x64.so/libllama.so deps/cu12.2.0/libllama.so
cp artifacts/llava-bin-linux-cublas-cu12.2.0-x64.so/libllava_shared.so deps/cu12.2.0/libllava_shared.so

cp artifacts/llama-bin-win-vulkan-x64.dll/llama.dll deps/vulkan/llama.dll
cp artifacts/llava-bin-win-vulkan-x64.dll/llava_shared.dll deps/vulkan/llava_shared.dll
# Windows Vulkan
cp artifacts/ggml-bin-win-vulkan-x64.dll/ggml.dll deps/vulkan/ggml.dll
cp artifacts/llama-bin-win-vulkan-x64.dll/llama.dll deps/vulkan/llama.dll
cp artifacts/llava-bin-win-vulkan-x64.dll/llava_shared.dll deps/vulkan/llava_shared.dll

cp artifacts/llama-bin-linux-vulkan-x64.so/libllama.so deps/vulkan/libllama.so
cp artifacts/llava-bin-linux-vulkan-x64.so/libllava_shared.so deps/vulkan/libllava_shared.so
# Linux Vulkan
cp artifacts/ggml-bin-linux-vulkan-x64.so/libggml.so deps/vulkan/libggml.so
cp artifacts/llama-bin-linux-vulkan-x64.so/libllama.so deps/vulkan/libllama.so
cp artifacts/llava-bin-linux-vulkan-x64.so/libllava_shared.so deps/vulkan/libllava_shared.so

- name: Upload artifacts
uses: actions/upload-artifact@v4
Expand All @@ -394,3 +467,4 @@ jobs:
llama-*
llava-*
*.metal
ggml-*
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -350,3 +350,5 @@ site/
/LLama.Benchmark/Models/*.gguf

**/appsettings.Local.json
/LLama/runtimes/deps
/LLama/runtimes/deps.zip
6 changes: 3 additions & 3 deletions LLama.Unittest/GrammarTest.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using LLama.Common;
using LLama.Common;
using LLama.Grammars;
using LLama.Native;

Expand Down Expand Up @@ -86,9 +86,9 @@ public async Task SampleWithTrivialGrammar()
Grammar = grammarInstance2,
};

var result = await executor.InferAsync("Q. 7 + 12\nA. ", inferenceParams).ToListAsync();
var result = string.Join("", await executor.InferAsync("Q. 7 + 12\nA. ", inferenceParams).ToListAsync());

Assert.Equal("cat", result[0]);
Assert.Equal("cat", result);
}

//this test is flakey - it reproduces an error which appears to be a bug in llama.cpp
Expand Down
38 changes: 32 additions & 6 deletions LLama.Unittest/LLama.Unittest.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,39 @@
</PackageReference>
</ItemGroup>

<Target Name="DownloadContentFiles" BeforeTargets="Build">
<DownloadFile SourceUrl="https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_S.gguf" DestinationFolder="Models" DestinationFileName="llama-2-7b-chat.Q3_K_S.gguf" SkipUnchangedFiles="true"></DownloadFile>
<DownloadFile SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/llava-v1.6-mistral-7b.Q3_K_XS.gguf" DestinationFolder="Models" DestinationFileName="llava-v1.6-mistral-7b.Q3_K_XS.gguf" SkipUnchangedFiles="true"></DownloadFile>
<DownloadFile SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/mmproj-model-f16.gguf" DestinationFolder="Models" DestinationFileName="mmproj-model-f16.gguf" SkipUnchangedFiles="true"></DownloadFile>
<DownloadFile SourceUrl="https://huggingface.co/leliuga/all-MiniLM-L12-v2-GGUF/resolve/main/all-MiniLM-L12-v2.Q8_0.gguf" DestinationFolder="Models" DestinationFileName="all-MiniLM-L12-v2.Q8_0.gguf" SkipUnchangedFiles="true"></DownloadFile>

<Target Name="DownloadContentFilesInner">

<DownloadFile
SourceUrl="https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_S.gguf"
DestinationFolder="Models"
DestinationFileName="llama-2-7b-chat.Q3_K_S.gguf"
SkipUnchangedFiles="true">
</DownloadFile>

<DownloadFile
SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/llava-v1.6-mistral-7b.Q3_K_XS.gguf"
DestinationFolder="Models" DestinationFileName="llava-v1.6-mistral-7b.Q3_K_XS.gguf"
SkipUnchangedFiles="true">
</DownloadFile>

<DownloadFile
SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/mmproj-model-f16.gguf"
DestinationFolder="Models"
DestinationFileName="mmproj-model-f16.gguf"
SkipUnchangedFiles="true">
</DownloadFile>

<DownloadFile
SourceUrl="https://huggingface.co/leliuga/all-MiniLM-L12-v2-GGUF/resolve/main/all-MiniLM-L12-v2.Q8_0.gguf"
DestinationFolder="Models"
DestinationFileName="all-MiniLM-L12-v2.Q8_0.gguf"
SkipUnchangedFiles="true">
</DownloadFile>

</Target>

<Target Name="DownloadContentFiles" BeforeTargets="DispatchToInnerBuilds;BeforeBuild">
<MSBuild Projects="$(MSBuildProjectFile)" Targets="DownloadContentFilesInner" Properties="TargetFramework=once" />
</Target>

<ItemGroup>
Expand Down
4 changes: 2 additions & 2 deletions LLama.Unittest/TemplateTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ public void Clear_ResetsTemplateState()
const string userData = nameof(userData);
templater.Add("user", userData);

// Generte the template string
// Generate the template string
var dest = templater.Apply();
var templateResult = Encoding.UTF8.GetString(dest);

Expand All @@ -263,4 +263,4 @@ public void EndOSpeechToken_ReturnsExpected()
{
Assert.Equal("</s>", _model.Tokens.EndOfSpeechToken);
}
}
}
7 changes: 7 additions & 0 deletions LLama/LLamaContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ public uint BatchThreads
/// Get the maximum batch size for this context
/// </summary>
public uint BatchSize => NativeHandle.BatchSize;

/// <summary>
/// Get the special tokens for the model associated with this context
/// </summary>
public SafeLlamaModelHandle.ModelTokens Tokens { get; }

private LLamaTokenData[]? _samplingBuffer;

Expand All @@ -99,6 +104,8 @@ public LLamaContext(LLamaWeights model, IContextParams @params, ILogger? logger

@params.ToLlamaContextParams(out var lparams);
NativeHandle = SafeLLamaContextHandle.Create(model.NativeHandle, lparams);

Tokens = model.Tokens;
}

/// <summary>
Expand Down
Loading