Skip to content

Commit 6a92faf

Browse files
authored
Merge pull request #1126 from martindevans/binary_update_march_2025
March 2025 Update
2 parents b4d0db8 + 8efade0 commit 6a92faf

17 files changed

+329
-88
lines changed

.github/workflows/compile.yml

Lines changed: 93 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,72 @@ jobs:
7676
name: llava-bin-linux-${{ matrix.build }}-x64.so
7777
if-no-files-found: error
7878

79+
compile-musl:
80+
name: Compile (musl)
81+
strategy:
82+
fail-fast: true
83+
matrix:
84+
include:
85+
- build: 'noavx'
86+
defines: '-DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
87+
- build: 'avx2'
88+
defines: ''
89+
- build: 'avx'
90+
defines: '-DGGML_AVX2=OFF'
91+
- build: 'avx512'
92+
defines: '-DGGML_AVX512=ON'
93+
runs-on: ubuntu-20.04
94+
container:
95+
image: alpine:latest
96+
steps:
97+
- name: Install dependencies
98+
run: |
99+
apk update && apk add --no-cache \
100+
build-base \
101+
cmake \
102+
git \
103+
linux-headers \
104+
g++
105+
- uses: actions/checkout@v4
106+
with:
107+
repository: ggerganov/llama.cpp
108+
fetch-depth: 0
109+
ref: '${{ github.event.inputs.llama_cpp_commit }}'
110+
- name: Build
111+
id: cmake_build_musl
112+
run: |
113+
mkdir build
114+
cd build
115+
cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }}
116+
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
117+
ls -R
118+
- uses: actions/upload-artifact@v4
119+
with:
120+
path: ./build/bin/libllama.so
121+
name: llama-bin-musl-${{ matrix.build }}-x64.so
122+
if-no-files-found: error
123+
- uses: actions/upload-artifact@v4
124+
with:
125+
path: ./build/bin/libggml.so
126+
name: ggml-bin-musl-${{ matrix.build }}-x64.so
127+
if-no-files-found: error
128+
- uses: actions/upload-artifact@v4
129+
with:
130+
path: ./build/bin/libggml-base.so
131+
name: ggml-base-bin-musl-${{ matrix.build }}-x64.so
132+
if-no-files-found: error
133+
- uses: actions/upload-artifact@v4
134+
with:
135+
path: ./build/bin/libggml-cpu.so
136+
name: ggml-cpu-bin-musl-${{ matrix.build }}-x64.so
137+
if-no-files-found: error
138+
- name: Upload Llava
139+
uses: actions/upload-artifact@v4
140+
with:
141+
path: ./build/bin/libllava_shared.so
142+
name: llava-bin-musl-${{ matrix.build }}-x64.so
143+
if-no-files-found: error
144+
79145
compile-windows:
80146
name: Compile (Windows)
81147
strategy:
@@ -519,6 +585,7 @@ jobs:
519585
if: ${{ always() }}
520586
needs: [
521587
"compile-linux",
588+
"compile-musl",
522589
"compile-windows",
523590
"compile-vulkan",
524591
"compile-cublas",
@@ -534,7 +601,7 @@ jobs:
534601
- name: Rearrange Files
535602
run: |
536603
# Make all directories at once
537-
mkdir --parents deps/{noavx,avx,avx2,avx512,osx-arm64,osx-x64,osx-x64-rosetta2,cu11.7.1,cu12.2.0,vulkan,android-arm64-v8a,android-x86,android-x86_64}
604+
mkdir --parents deps/{noavx,avx,avx2,avx512,musl-noavx,musl-avx,musl-avx2,musl-avx512,osx-arm64,osx-x64,osx-x64-rosetta2,cu11.7.1,cu12.2.0,vulkan,android-arm64-v8a,android-x86,android-x86_64}
538605
539606
# Linux
540607
cp artifacts/ggml-bin-linux-noavx-x64.so/libggml.so deps/noavx/libggml.so
@@ -561,6 +628,31 @@ jobs:
561628
cp artifacts/llama-bin-linux-avx512-x64.so/libllama.so deps/avx512/libllama.so
562629
cp artifacts/llava-bin-linux-avx512-x64.so/libllava_shared.so deps/avx512/libllava_shared.so
563630
631+
# Musl
632+
cp artifacts/ggml-bin-musl-noavx-x64.so/libggml.so deps/musl-noavx/libggml.so
633+
cp artifacts/ggml-base-bin-musl-noavx-x64.so/libggml-base.so deps/musl-noavx/libggml-base.so
634+
cp artifacts/ggml-cpu-bin-musl-noavx-x64.so/libggml-cpu.so deps/musl-noavx/libggml-cpu.so
635+
cp artifacts/llama-bin-musl-noavx-x64.so/libllama.so deps/musl-noavx/libllama.so
636+
cp artifacts/llava-bin-musl-noavx-x64.so/libllava_shared.so deps/musl-noavx/libllava_shared.so
637+
638+
cp artifacts/ggml-bin-musl-avx-x64.so/libggml.so deps/musl-avx/libggml.so
639+
cp artifacts/ggml-base-bin-musl-avx-x64.so/libggml-base.so deps/musl-avx/libggml-base.so
640+
cp artifacts/ggml-cpu-bin-musl-avx-x64.so/libggml-cpu.so deps/musl-avx/libggml-cpu.so
641+
cp artifacts/llama-bin-musl-avx-x64.so/libllama.so deps/musl-avx/libllama.so
642+
cp artifacts/llava-bin-musl-avx-x64.so/libllava_shared.so deps/musl-avx/libllava_shared.so
643+
644+
cp artifacts/ggml-bin-musl-avx2-x64.so/libggml.so deps/musl-avx2/libggml.so
645+
cp artifacts/ggml-base-bin-musl-avx2-x64.so/libggml-base.so deps/musl-avx2/libggml-base.so
646+
cp artifacts/ggml-cpu-bin-musl-avx2-x64.so/libggml-cpu.so deps/musl-avx2/libggml-cpu.so
647+
cp artifacts/llama-bin-musl-avx2-x64.so/libllama.so deps/musl-avx2/libllama.so
648+
cp artifacts/llava-bin-musl-avx2-x64.so/libllava_shared.so deps/musl-avx2/libllava_shared.so
649+
650+
cp artifacts/ggml-bin-musl-avx512-x64.so/libggml.so deps/musl-avx512/libggml.so
651+
cp artifacts/ggml-base-bin-musl-avx512-x64.so/libggml-base.so deps/musl-avx512/libggml-base.so
652+
cp artifacts/ggml-cpu-bin-musl-avx512-x64.so/libggml-cpu.so deps/musl-avx512/libggml-cpu.so
653+
cp artifacts/llama-bin-musl-avx512-x64.so/libllama.so deps/musl-avx512/libllama.so
654+
cp artifacts/llava-bin-musl-avx512-x64.so/libllava_shared.so deps/musl-avx512/libllava_shared.so
655+
564656
# Windows
565657
cp artifacts/ggml-bin-win-noavx-x64.dll/ggml.dll deps/noavx/ggml.dll
566658
cp artifacts/ggml-base-bin-win-noavx-x64.dll/ggml-base.dll deps/noavx/ggml-base.dll

LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,10 @@ public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config)
3333
{
3434
ContextSize = config.ContextSize,
3535
GpuLayerCount = config.GpuLayerCount ?? 20,
36-
Embeddings = true,
36+
3737
PoolingType = LLamaPoolingType.Mean,
3838
};
39+
3940
_weights = LLamaWeights.LoadFromFile(@params);
4041
_embedder = new LLamaEmbedder(_weights, @params);
4142
_ownsWeights = true;

LLama.Unittest/KernelMemory/ITextTokenizerTests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ public ITextTokenizerTests(ITestOutputHelper testOutputHelper)
2222
_testOutputHelper = testOutputHelper;
2323

2424
_infParams = new() { AntiPrompts = ["\n\n"] };
25-
_lsConfig = new(Constants.GenerativeModelPath) { DefaultInferenceParams = _infParams };
25+
_lsConfig = new(Constants.GenerativeModelPath) { DefaultInferenceParams = _infParams, ContextSize = 512 };
2626

2727
testOutputHelper.WriteLine($"Using model {Path.GetFileName(_lsConfig.ModelPath)}");
2828
}

LLama.Unittest/LLamaContextTests.cs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ public LLamaContextTests()
1414
var @params = new ModelParams(Constants.GenerativeModelPath2)
1515
{
1616
ContextSize = 128,
17+
BatchSize = 8,
18+
UBatchSize = 8,
19+
SeqMax = 1,
20+
VocabOnly = false,
1721
GpuLayerCount = Constants.CIGpuLayerCount,
1822
};
1923
_weights = LLamaWeights.LoadFromFile(@params);
@@ -84,6 +88,11 @@ public void TokenizeEmpty()
8488
[Fact]
8589
public void SaveLoadState()
8690
{
91+
// Make sure there's something in the context worth saving
92+
var batch = new LLamaBatch();
93+
batch.Add(17, 0, LLamaSeqId.Zero, true);
94+
_context.Decode(batch);
95+
8796
using var state1 = _context.GetState();
8897

8998
var stream = new MemoryStream();
@@ -99,6 +108,11 @@ public void SaveLoadState()
99108
[Fact]
100109
public async Task SaveLoadStateAsync()
101110
{
111+
// Make sure there's something in the context worth saving
112+
var batch = new LLamaBatch();
113+
batch.Add(17, 0, LLamaSeqId.Zero, true);
114+
_context.Decode(batch);
115+
102116
using var state1 = _context.GetState();
103117

104118
var stream = new MemoryStream();

LLama/Batched/Conversation.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ public Conversation Fork()
129129
_forked = true;
130130

131131
// Assign tokens to the new sequence
132-
NativeApi.llama_kv_cache_seq_cp(Executor.Context.NativeHandle, ConversationId, c.ConversationId, 0, _end);
132+
Executor.Context.NativeHandle.KvCacheSequenceCopy(ConversationId, c.ConversationId, 0, _end);
133133

134134
return c;
135135
}

LLama/LLamaExecutorBase.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,8 +193,8 @@ protected virtual void HandleRunOutOfContext(int tokensToKeep)
193193
int n_left = _pastTokensCount - tokensToKeep;
194194
int n_discard = n_left / 2;
195195

196-
NativeApi.llama_kv_cache_seq_rm(Context.NativeHandle, LLamaSeqId.Zero, tokensToKeep, tokensToKeep + n_discard);
197-
NativeApi.llama_kv_cache_seq_add(Context.NativeHandle, LLamaSeqId.Zero, tokensToKeep + n_discard, _pastTokensCount, -n_discard);
196+
NativeApi.llama_kv_self_seq_rm(Context.NativeHandle, LLamaSeqId.Zero, tokensToKeep, tokensToKeep + n_discard);
197+
NativeApi.llama_kv_self_seq_add(Context.NativeHandle, LLamaSeqId.Zero, tokensToKeep + n_discard, _pastTokensCount, -n_discard);
198198

199199
_pastTokensCount -= n_discard;
200200
// stop saving session if we run out of context

LLama/LLamaSharp.Runtime.targets

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@
200200
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
201201
<Link>runtimes/linux-x64/native/avx512/libggml-cpu.so</Link>
202202
</None>
203+
203204

204205
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/cu11.7.1/libllama.so">
205206
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
@@ -253,6 +254,75 @@
253254
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
254255
<Link>runtimes/linux-x64/native/vulkan/libggml-vulkan.so</Link>
255256
</None>
257+
258+
259+
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-noavx/libllama.so">
260+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
261+
<Link>runtimes/linux-musl-x64/native/noavx/libllama.so</Link>
262+
</None>
263+
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-noavx/libggml.so">
264+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
265+
<Link>runtimes/linux-musl-x64/native/noavx/libggml.so</Link>
266+
</None>
267+
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-noavx/libggml-base.so">
268+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
269+
<Link>runtimes/linux-musl-x64/native/noavx/libggml-base.so</Link>
270+
</None>
271+
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-noavx/libggml-cpu.so">
272+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
273+
<Link>runtimes/linux-musl-x64/native/noavx/libggml-cpu.so</Link>
274+
</None>
275+
276+
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx/libllama.so">
277+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
278+
<Link>runtimes/linux-musl-x64/native/avx/libllama.so</Link>
279+
</None>
280+
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx/libggml.so">
281+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
282+
<Link>runtimes/linux-musl-x64/native/avx/libggml.so</Link>
283+
</None>
284+
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx/libggml-base.so">
285+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
286+
<Link>runtimes/linux-musl-x64/native/avx/libggml-base.so</Link>
287+
</None>
288+
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx/libggml-cpu.so">
289+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
290+
<Link>runtimes/linux-musl-x64/native/avx/libggml-cpu.so</Link>
291+
</None>
292+
293+
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx2/libllama.so">
294+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
295+
<Link>runtimes/linux-musl-x64/native/avx2/libllama.so</Link>
296+
</None>
297+
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx2/libggml.so">
298+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
299+
<Link>runtimes/linux-musl-x64/native/avx2/libggml.so</Link>
300+
</None>
301+
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx2/libggml-base.so">
302+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
303+
<Link>runtimes/linux-musl-x64/native/avx2/libggml-base.so</Link>
304+
</None>
305+
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx2/libggml-cpu.so">
306+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
307+
<Link>runtimes/linux-musl-x64/native/avx2/libggml-cpu.so</Link>
308+
</None>
309+
310+
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx512/libllama.so">
311+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
312+
<Link>runtimes/linux-musl-x64/native/avx512/libllama.so</Link>
313+
</None>
314+
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx512/libggml.so">
315+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
316+
<Link>runtimes/linux-musl-x64/native/avx512/libggml.so</Link>
317+
</None>
318+
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx512/libggml-base.so">
319+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
320+
<Link>runtimes/linux-musl-x64/native/avx512/libggml-base.so</Link>
321+
</None>
322+
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx512/libggml-cpu.so">
323+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
324+
<Link>runtimes/linux-musl-x64/native/avx512/libggml-cpu.so</Link>
325+
</None>
256326

257327

258328
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/osx-arm64/libggml-base.dylib">

LLama/LLamaSharp.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
</ItemGroup>
5858

5959
<PropertyGroup>
60-
<BinaryReleaseId>5783575c9d99</BinaryReleaseId>
60+
<BinaryReleaseId>be7c3034108473be</BinaryReleaseId>
6161
</PropertyGroup>
6262

6363
<PropertyGroup>

LLama/LLamaStatelessExecutor.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,8 @@ public async IAsyncEnumerable<string> InferAsync(string prompt, IInferenceParams
155155
var n_left = n_past - tokensKeep;
156156
var n_discard = n_left / 2;
157157

158-
NativeApi.llama_kv_cache_seq_rm(Context.NativeHandle, LLamaSeqId.Zero, tokensKeep , tokensKeep + n_discard);
159-
NativeApi.llama_kv_cache_seq_add(Context.NativeHandle, LLamaSeqId.Zero, tokensKeep + n_discard, n_past, -n_discard);
158+
NativeApi.llama_kv_self_seq_rm(Context.NativeHandle, LLamaSeqId.Zero, tokensKeep , tokensKeep + n_discard);
159+
NativeApi.llama_kv_self_seq_add(Context.NativeHandle, LLamaSeqId.Zero, tokensKeep + n_discard, n_past, -n_discard);
160160

161161
n_past -= n_discard;
162162
}

LLama/Native/LLamaKvCache.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
namespace LLama.Native;
2+
3+
/// <summary>
4+
/// C# representation of llama_kv_cache
5+
/// </summary>
6+
/// <remarks>llama_kv_cache</remarks>
7+
internal struct LLamaKvCacheNative
8+
{
9+
10+
}

0 commit comments

Comments
 (0)