Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import PackageDescription

let llamaVersion = "b5575"
let llamaVersion = "b5602"

// MARK: - Package Dependencies

Expand Down Expand Up @@ -94,7 +94,7 @@ packageTargets.append(contentsOf: [
name: "LocalLLMClientLlamaFramework",
url:
"https://github.com/ggml-org/llama.cpp/releases/download/\(llamaVersion)/llama-\(llamaVersion)-xcframework.zip",
checksum: "78ba1f1a8622e548f5e8a29b93473afe2e879b9b0781c3af0d31673c3310c7d6"
checksum: "88b75946b8fdedc1eb7eb97be1286d0f15c836f3676579305cf091cb977740ac"
),
.target(
name: "LocalLLMClientLlamaC",
Expand Down Expand Up @@ -159,7 +159,7 @@ packageTargets.append(contentsOf: [
],
linkerSettings: [
.unsafeFlags([
"-lggml-base", "-lggml-cpu", "-lggml-rpc", "-lggml", "-lllama", "-lmtmd"
"-lggml-base", "-lggml", "-lllama", "-lmtmd"
])
]
),
Expand Down
5 changes: 5 additions & 0 deletions Sources/LocalLLMClientLlama/Utility.swift
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#else
@preconcurrency import LocalLLMClientLlamaC
#endif
import Foundation
#if canImport(OSLog)
import OSLog
#endif
Expand All @@ -20,6 +21,10 @@ nonisolated(unsafe) private var llamaLogCallback: ((LlamaLogLevel, String) -> Vo
public func initializeLlama() {
guard !isLlamaInitialized else { return }
isLlamaInitialized = true
#if os(Linux)
ggml_backend_load_all_from_path(ProcessInfo.processInfo.environment["LD_LIBRARY_PATH"])
#endif

llama_backend_init()

if !isCustomLogEnabled {
Expand Down
2 changes: 1 addition & 1 deletion Sources/LocalLLMClientLlamaC/exclude/llama.cpp
Submodule llama.cpp updated 81 files
+4 −4 .github/workflows/build.yml
+12 −5 .github/workflows/release.yml
+1 −1 .github/workflows/server.yml
+5 −0 CMakeLists.txt
+31 −11 README.md
+14 −1 ci/run.sh
+1 −0 common/arg.cpp
+8 −7 common/chat.cpp
+1 −1 common/chat.h
+2 −2 common/common.cpp
+2 −1 common/common.h
+6 −4 common/speculative.cpp
+1 −2 convert_hf_to_gguf.py
+4 −0 docs/build.md
+20 −16 docs/install.md
+1 −1 examples/batched.swift/Sources/main.swift
+18 −3 examples/embedding/embedding.cpp
+2 −2 examples/gritlm/gritlm.cpp
+4 −4 examples/llama.android/llama/src/main/cpp/llama-android.cpp
+4 −4 examples/llama.swiftui/llama.cpp.swift/LibLlama.swift
+8 −6 examples/lookahead/lookahead.cpp
+1 −1 examples/lookup/lookup.cpp
+7 −5 examples/parallel/parallel.cpp
+11 −9 examples/passkey/passkey.cpp
+1 −1 examples/retrieval/retrieval.cpp
+1 −1 examples/save-load-state/save-load-state.cpp
+2 −2 examples/simple-chat/simple-chat.cpp
+1 −1 examples/speculative-simple/speculative-simple.cpp
+14 −12 examples/speculative/speculative.cpp
+1 −1 ggml/CMakeLists.txt
+0 −1 ggml/src/CMakeLists.txt
+2 −2 ggml/src/ggml-cpu/ops.cpp
+4 −1 ggml/src/ggml-cuda/fattn-mma-f16.cuh
+5 −3 ggml/src/ggml-metal/ggml-metal.m
+52 −42 ggml/src/ggml-metal/ggml-metal.metal
+6 −0 ggml/src/ggml-opencl/CMakeLists.txt
+747 −3 ggml/src/ggml-opencl/ggml-opencl.cpp
+109 −0 ggml/src/ggml-opencl/kernels/concat.cl
+30 −0 ggml/src/ggml-opencl/kernels/pad.cl
+39 −0 ggml/src/ggml-opencl/kernels/repeat.cl
+63 −0 ggml/src/ggml-opencl/kernels/tanh.cl
+48 −0 ggml/src/ggml-opencl/kernels/tsembd.cl
+121 −0 ggml/src/ggml-opencl/kernels/upscale.cl
+162 −34 ggml/src/ggml-vulkan/ggml-vulkan.cpp
+98 −0 ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp
+2 −0 ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
+3 −0 gguf-py/gguf/gguf_writer.py
+124 −29 include/llama.h
+0 −1 src/CMakeLists.txt
+240 −109 src/llama-context.cpp
+8 −6 src/llama-context.h
+2 −3 src/llama-graph.cpp
+1 −1 src/llama-graph.h
+16 −16 src/llama-kv-cache-recurrent.cpp
+13 −19 src/llama-kv-cache-recurrent.h
+34 −31 src/llama-kv-cache-unified-iswa.cpp
+22 −24 src/llama-kv-cache-unified-iswa.h
+141 −86 src/llama-kv-cache-unified.cpp
+66 −37 src/llama-kv-cache-unified.h
+0 −1 src/llama-kv-cache.cpp
+0 −44 src/llama-kv-cache.h
+41 −0 src/llama-memory.cpp
+65 −23 src/llama-memory.h
+1 −1 src/llama-mmap.cpp
+42 −17 src/llama-model-loader.cpp
+38 −10 src/llama-model.cpp
+3 −0 src/llama-model.h
+5 −1 src/llama-vocab.cpp
+14 −2 tests/test-backend-ops.cpp
+1 −1 tests/test-chat.cpp
+4 −2 tools/batched-bench/batched-bench.cpp
+1 −1 tools/cvector-generator/cvector-generator.cpp
+1 −1 tools/imatrix/imatrix.cpp
+2 −2 tools/llama-bench/llama-bench.cpp
+8 −6 tools/main/main.cpp
+1 −1 tools/mtmd/mtmd-cli.cpp
+6 −6 tools/perplexity/perplexity.cpp
+2 −2 tools/run/run.cpp
+17 −15 tools/server/server.cpp
+6 −7 tools/server/tests/unit/test_tool_call.py
+10 −1 tools/server/tests/utils.py