From 79b5bb999823ab92016c46048ab76a302e8bf49e Mon Sep 17 00:00:00 2001
From: Michael Lamothe <michael.lamothe@gmail.com>
Date: Sun, 5 Oct 2025 22:57:04 +1100
Subject: [PATCH 1/2] Fix some warnings.

---
 LLama.Examples/Examples/QuantizeModel.cs |  2 ++
 LLama/Batched/Conversation.cs            |  2 +-
 LLama/Common/FixedSizeQueue.cs           |  1 -
 LLama/LLamaExecutorBase.cs               |  4 ++--
 LLama/LLamaInstructExecutor.cs           |  6 +++++-
 LLama/LLamaInteractExecutor.cs           | 25 ++++++++++++++++--------
 LLama/Native/SafeLlamaModelHandle.cs     |  2 ++
 7 files changed, 29 insertions(+), 13 deletions(-)
diff --git a/LLama.Examples/Examples/QuantizeModel.cs b/LLama.Examples/Examples/QuantizeModel.cs
index a1f7ca1bd..dace956ca 100644
--- a/LLama.Examples/Examples/QuantizeModel.cs
+++ b/LLama.Examples/Examples/QuantizeModel.cs
@@ -20,6 +20,8 @@ public static async Task Run()
             {
                 Console.WriteLine("Quantization failed!");
             }
+
+            await Task.CompletedTask;
         }
     }
 }
diff --git a/LLama/Batched/Conversation.cs b/LLama/Batched/Conversation.cs
index fcc94ae8f..c504ce07a 100644
--- a/LLama/Batched/Conversation.cs
+++ b/LLama/Batched/Conversation.cs
@@ -410,7 +410,7 @@ public void Remove(LLamaPos start, LLamaPos end)
         }
 
         /// <summary>
-        /// Removes <see cref="count"/> tokens starting from <see cref="start"/>
+        /// Removes <paramref name="count"/> tokens starting from <paramref name="start"/>
         /// </summary>
         /// <param name="start">Start position (inclusive)</param>
         /// <param name="count">Number of tokens</param>
diff --git a/LLama/Common/FixedSizeQueue.cs b/LLama/Common/FixedSizeQueue.cs
index d1f2fb11d..408be623a 100644
--- a/LLama/Common/FixedSizeQueue.cs
+++ b/LLama/Common/FixedSizeQueue.cs
@@ -14,7 +14,6 @@ public class FixedSizeQueue<T>
         private readonly T[] _buffer;
         private int _start;
         private int _count;
-        private T[]? _window;
 
         // Minimum capacity for the temporary buffer used to expose a contiguous view.
         private const int MinimumWindowSize = 4;
diff --git a/LLama/LLamaExecutorBase.cs b/LLama/LLamaExecutorBase.cs
index e3efb35a5..227626f78 100644
--- a/LLama/LLamaExecutorBase.cs
+++ b/LLama/LLamaExecutorBase.cs
@@ -262,7 +262,7 @@ protected virtual void TryReuseMatchingPrefix()
         /// <param name="inferenceParams"></param>
         /// <param name="args"></param>
         /// <returns></returns>
-        protected abstract Task<(bool, IReadOnlyList<string>)> PostProcess(IInferenceParams inferenceParams, InferStateArgs args);
+        protected abstract (bool, IReadOnlyList<string>) PostProcess(IInferenceParams inferenceParams, InferStateArgs args);
 
         /// <summary>
         /// The core inference logic.
@@ -338,7 +338,7 @@ public virtual async IAsyncEnumerable<string> InferAsync(string? text, IInferenc
                     yield return decoded;
                 }
 
-                var (breakGeneration, extraOutputs) = await PostProcess(inferenceParams, args);
+                var (breakGeneration, extraOutputs) = PostProcess(inferenceParams, args);
                 if (extraOutputs is { Count: > 0 })
                 {
                     foreach (var item in extraOutputs)
diff --git a/LLama/LLamaInstructExecutor.cs b/LLama/LLamaInstructExecutor.cs
index 6617687d6..9ff45c253 100644
--- a/LLama/LLamaInstructExecutor.cs
+++ b/LLama/LLamaInstructExecutor.cs
@@ -99,6 +99,7 @@ public override async Task SaveState(string filename)
                 await JsonSerializer.SerializeAsync(fs, state);
             }
         }
+
         /// <inheritdoc />
         public override async Task LoadState(string filename)
         {
@@ -154,7 +155,7 @@ protected override Task PreprocessInputs(string? text, InferStateArgs args)
         }
 
         /// <inheritdoc />
-        protected override async Task<(bool, IReadOnlyList<string>)> PostProcess(IInferenceParams inferenceParams, InferStateArgs args)
+        protected override (bool, IReadOnlyList<string>) PostProcess(IInferenceParams inferenceParams, InferStateArgs args)
         {
             if (_embed_inps.Count <= _consumedTokensCount)
             {
@@ -205,7 +206,9 @@ protected override async Task InferInternal(IInferenceParams inferenceParams, In
                 _pastTokensCount = pastTokensCount;
 
                 if (result != DecodeResult.Ok)
+                {
                     throw new LLamaDecodeError(result);
+                }
 
                 if (_embeds.Count > 0 && !string.IsNullOrEmpty(_pathSession))
                 {
@@ -250,6 +253,7 @@ protected override async Task InferInternal(IInferenceParams inferenceParams, In
 
             return;
         }
+        
         /// <summary>
         /// The descriptor of the state of the instruct executor.
         /// </summary>
diff --git a/LLama/LLamaInteractExecutor.cs b/LLama/LLamaInteractExecutor.cs
index 1baebfa7e..fe701e8f2 100644
--- a/LLama/LLamaInteractExecutor.cs
+++ b/LLama/LLamaInteractExecutor.cs
@@ -67,6 +67,7 @@ public override ExecutorBaseState GetStateData()
             };
             return state;
         }
+
         /// <inheritdoc />
         public override Task LoadState(ExecutorBaseState data)
         {
@@ -88,23 +89,23 @@ public override Task LoadState(ExecutorBaseState data)
 
             return Task.CompletedTask;
         }
+        
         /// <inheritdoc />
         public override async Task SaveState(string filename)
         {
             var state = (InteractiveExecutorState)GetStateData();
-            using(var fs = new FileStream(filename, FileMode.Create, FileAccess.Write))
+            using (var fs = new FileStream(filename, FileMode.Create, FileAccess.Write))
             {
                 await JsonSerializer.SerializeAsync(fs, state);
             }
         }
+
         /// <inheritdoc />
         public override async Task LoadState(string filename)
         {
-            using (var fs = new FileStream(filename, FileMode.Open, FileAccess.Read))
-            {
-                var state = await JsonSerializer.DeserializeAsync<InteractiveExecutorState>(fs);
-                await LoadState(state!);
-            }
+            using var fs = new FileStream(filename, FileMode.Open, FileAccess.Read);
+            var state = await JsonSerializer.DeserializeAsync<InteractiveExecutorState>(fs);
+            await LoadState(state!);
         }
 
         /// <summary>
@@ -122,7 +123,11 @@ protected override Task PreprocessInputs(string? text, InferStateArgs args)
             if (_is_prompt_run)
             {
                 // When running the first input (prompt) in interactive mode, we should specially process it.
-                if (text == null) throw new ArgumentException("Prompt cannot be null to trigger continuation if a prompt has not been provided previously.");
+                if (text == null)
+                {
+                    throw new ArgumentException("Prompt cannot be null to trigger continuation if a prompt has not been provided previously.");
+                }
+                
                 if (!IsMultiModal)
                 {
                     _embed_inps = Context.Tokenize(text, true, true).ToList();
@@ -203,15 +208,19 @@ private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = tru
         /// <param name="inferenceParams"></param>
         /// <param name="args"></param>
         /// <returns></returns>
-        protected override async Task<(bool, IReadOnlyList<string>)> PostProcess(IInferenceParams inferenceParams, InferStateArgs args)
+        protected override (bool, IReadOnlyList<string>) PostProcess(IInferenceParams inferenceParams, InferStateArgs args)
         {
             if (_embed_inps.Count <= _consumedTokensCount)
             {
                 if (!string.IsNullOrEmpty(args.LastOutput) && AntipromptProcessor.Add(args.LastOutput))
+                {
                     args.WaitForInput = true;
+                }
 
                 if (_pastTokensCount > 0 && args.WaitForInput)
+                {
                     return (true, Array.Empty<string>());
+                }
             }
 
             if (_embeds.Count > 0 && _embeds.Last().IsEndOfGeneration(Context.Vocab))
diff --git a/LLama/Native/SafeLlamaModelHandle.cs b/LLama/Native/SafeLlamaModelHandle.cs
index d335a1209..9c6c0349a 100644
--- a/LLama/Native/SafeLlamaModelHandle.cs
+++ b/LLama/Native/SafeLlamaModelHandle.cs
@@ -436,6 +436,7 @@ private static int llama_model_meta_val_str(SafeLlamaModelHandle model, string k
         /// </summary>
         /// <param name="model"></param>
         /// <returns></returns>
+        [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
         private static extern uint llama_model_n_cls_out(SafeLlamaModelHandle model);
 
         /// <summary>
@@ -444,6 +445,7 @@ private static int llama_model_meta_val_str(SafeLlamaModelHandle model, string k
         /// <param name="model"></param>
         /// <param name="i"></param>
         /// <returns></returns>
+        [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
         private static extern string? llama_model_cls_label(SafeLlamaModelHandle model, uint i);
         #endregion
 

From 10486d5f66f4069df15344901df738cf7a813f35 Mon Sep 17 00:00:00 2001
From: Michael Lamothe <michael.lamothe@gmail.com>
Date: Tue, 7 Oct 2025 23:42:21 +1100
Subject: [PATCH 2/2] Make PostProcess async again.

---
 LLama/LLamaExecutorBase.cs     |  6 +++---
 LLama/LLamaInstructExecutor.cs |  8 ++++----
 LLama/LLamaInteractExecutor.cs | 34 +++++++++++++++++-----------------
 3 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/LLama/LLamaExecutorBase.cs b/LLama/LLamaExecutorBase.cs
index 227626f78..eee5ea49e 100644
--- a/LLama/LLamaExecutorBase.cs
+++ b/LLama/LLamaExecutorBase.cs
@@ -262,7 +262,7 @@ protected virtual void TryReuseMatchingPrefix()
         /// <param name="inferenceParams"></param>
         /// <param name="args"></param>
         /// <returns></returns>
-        protected abstract (bool, IReadOnlyList<string>) PostProcess(IInferenceParams inferenceParams, InferStateArgs args);
+        protected abstract Task<(bool, IReadOnlyList<string>)> PostProcess(IInferenceParams inferenceParams, InferStateArgs args);
 
         /// <summary>
         /// The core inference logic.
@@ -317,7 +317,7 @@ public virtual async IAsyncEnumerable<string> InferAsync(string? text, IInferenc
                 NeedToSaveSession = !string.IsNullOrEmpty(_pathSession) && _n_matching_session_tokens < _embed_inps.Count
             };
 
-            AntipromptProcessor.SetAntiprompts(inferenceParams.AntiPrompts ?? Array.Empty<string>());
+            AntipromptProcessor.SetAntiprompts(inferenceParams.AntiPrompts ?? []);
 
             await PreprocessInputs(text, args);
 
@@ -338,7 +338,7 @@ public virtual async IAsyncEnumerable<string> InferAsync(string? text, IInferenc
                     yield return decoded;
                 }
 
-                var (breakGeneration, extraOutputs) = PostProcess(inferenceParams, args);
+                var (breakGeneration, extraOutputs) = await PostProcess(inferenceParams, args);
                 if (extraOutputs is { Count: > 0 })
                 {
                     foreach (var item in extraOutputs)
diff --git a/LLama/LLamaInstructExecutor.cs b/LLama/LLamaInstructExecutor.cs
index 9ff45c253..a2898c098 100644
--- a/LLama/LLamaInstructExecutor.cs
+++ b/LLama/LLamaInstructExecutor.cs
@@ -155,19 +155,19 @@ protected override Task PreprocessInputs(string? text, InferStateArgs args)
         }
 
         /// <inheritdoc />
-        protected override (bool, IReadOnlyList<string>) PostProcess(IInferenceParams inferenceParams, InferStateArgs args)
+        protected override Task<(bool, IReadOnlyList<string>)> PostProcess(IInferenceParams inferenceParams, InferStateArgs args)
         {
             if (_embed_inps.Count <= _consumedTokensCount)
             {
                 if (!string.IsNullOrEmpty(args.LastOutput) && AntipromptProcessor.Add(args.LastOutput))
                 {
                     args.WaitForInput = true;
-                    return (true, Array.Empty<string>());
+                    return Task.FromResult<(bool, IReadOnlyList<string>)>((true, []));
                 }
 
                 if (_pastTokensCount > 0 && args.WaitForInput)
                 {
-                    return (true, new[] { "\n> " });
+                    return Task.FromResult<(bool, IReadOnlyList<string>)>((true, [ "\n> " ]));
                 }
             }
 
@@ -181,7 +181,7 @@ protected override (bool, IReadOnlyList<string>) PostProcess(IInferenceParams in
                 args.RemainedTokens = inferenceParams.MaxTokens;
                 args.WaitForInput = true;
             }
-            return (false, Array.Empty<string>());
+            return Task.FromResult<(bool, IReadOnlyList<string>)>((false, []));
         }
 
         /// <inheritdoc />
diff --git a/LLama/LLamaInteractExecutor.cs b/LLama/LLamaInteractExecutor.cs
index fe701e8f2..c76a11215 100644
--- a/LLama/LLamaInteractExecutor.cs
+++ b/LLama/LLamaInteractExecutor.cs
@@ -21,7 +21,7 @@ namespace LLama
     public class InteractiveExecutor : StatefulExecutorBase
     {
         private bool _is_prompt_run = true;
-        
+
         // LLava
         private int _EmbedImagePosition = -1;
         private List<SafeLlavaImageEmbedHandle> _imageEmbedHandles = new List<SafeLlavaImageEmbedHandle>();
@@ -36,7 +36,7 @@ public InteractiveExecutor(LLamaContext context, ILogger? logger = null)
             : base(context, logger)
         {
         }
-        
+
         /// <summary>
         /// 
         /// </summary>
@@ -46,7 +46,7 @@ public InteractiveExecutor(LLamaContext context, ILogger? logger = null)
         public InteractiveExecutor(LLamaContext context, LLavaWeights clipModel, ILogger? logger = null)
             : base(context, clipModel, logger)
         {
-        }        
+        }
 
         /// <inheritdoc />
         public override ExecutorBaseState GetStateData()
@@ -89,7 +89,7 @@ public override Task LoadState(ExecutorBaseState data)
 
             return Task.CompletedTask;
         }
-        
+
         /// <inheritdoc />
         public override async Task SaveState(string filename)
         {
@@ -127,7 +127,7 @@ protected override Task PreprocessInputs(string? text, InferStateArgs args)
                 {
                     throw new ArgumentException("Prompt cannot be null to trigger continuation if a prompt has not been provided previously.");
                 }
-                
+
                 if (!IsMultiModal)
                 {
                     _embed_inps = Context.Tokenize(text, true, true).ToList();
@@ -164,8 +164,8 @@ protected override Task PreprocessInputs(string? text, InferStateArgs args)
         }
 
         /// <inheritdoc />
-        private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = true )
-        {   
+        private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = true)
+        {
             // If the prompt contains the tag <image> extract this.
             _imageInPrompt = text.Contains("<image>");
             if (_imageInPrompt && IsMultiModal)
@@ -196,7 +196,7 @@ private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = tru
                 {
                     var line_inp = Context.Tokenize(text, false, true);
                     _embed_inps.AddRange(line_inp);
-                    args.RemainedTokens -= line_inp.Length;                    
+                    args.RemainedTokens -= line_inp.Length;
                 }
             }
             return Task.CompletedTask;
@@ -208,7 +208,7 @@ private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = tru
         /// <param name="inferenceParams"></param>
         /// <param name="args"></param>
         /// <returns></returns>
-        protected override (bool, IReadOnlyList<string>) PostProcess(IInferenceParams inferenceParams, InferStateArgs args)
+        protected override Task<(bool, IReadOnlyList<string>)> PostProcess(IInferenceParams inferenceParams, InferStateArgs args)
         {
             if (_embed_inps.Count <= _consumedTokensCount)
             {
@@ -219,13 +219,13 @@ protected override (bool, IReadOnlyList<string>) PostProcess(IInferenceParams in
 
                 if (_pastTokensCount > 0 && args.WaitForInput)
                 {
-                    return (true, Array.Empty<string>());
+                    return Task.FromResult((true, (IReadOnlyList<string>)[]));
                 }
             }
 
             if (_embeds.Count > 0 && _embeds.Last().IsEndOfGeneration(Context.Vocab))
             {
-                return (true, Array.Empty<string>());
+                return Task.FromResult((true, (IReadOnlyList<string>)[]));
             }
 
             if (args.RemainedTokens <= 0 && inferenceParams.MaxTokens != -1)
@@ -234,7 +234,7 @@ protected override (bool, IReadOnlyList<string>) PostProcess(IInferenceParams in
                 args.WaitForInput = true;
             }
 
-            return (false, Array.Empty<string>());
+            return Task.FromResult((true, (IReadOnlyList<string>)[]));
         }
 
         /// <inheritdoc />
@@ -267,18 +267,18 @@ protected override async Task InferInternal(IInferenceParams inferenceParams, In
                 // Changes to support Multi-Modal LLMs.
                 //
                 (DecodeResult, int, int) header, end, result;
-                if (IsMultiModal &&  _EmbedImagePosition > 0)
+                if (IsMultiModal && _EmbedImagePosition > 0)
                 {
                     // Tokens previous to the images
                     header = await Context.DecodeAsync(_embeds.GetRange(0, _EmbedImagePosition), LLamaSeqId.Zero, batch, _pastTokensCount);
                     _pastTokensCount = header.Item3;
 
                     if (header.Item1 != DecodeResult.Ok) throw new LLamaDecodeError(header.Item1);
-                   
+
                     // Images
-                    foreach( var image in _imageEmbedHandles )
+                    foreach (var image in _imageEmbedHandles)
                         ClipModel!.EvalImageEmbed(Context, image, ref _pastTokensCount);
-                        
+
                     // Post-image Tokens
                     end = await Context.DecodeAsync(_embeds.GetRange(_EmbedImagePosition, _embeds.Count - _EmbedImagePosition), LLamaSeqId.Zero, batch, _pastTokensCount);
                     _pastTokensCount = end.Item3;
@@ -294,7 +294,7 @@ protected override async Task InferInternal(IInferenceParams inferenceParams, In
 
                     if (result.Item1 != DecodeResult.Ok) throw new LLamaDecodeError(result.Item1);
                 }
-                
+
 
                 if (_embeds.Count > 0 && !string.IsNullOrEmpty(_pathSession))
                 {