From 79b5bb999823ab92016c46048ab76a302e8bf49e Mon Sep 17 00:00:00 2001 From: Michael Lamothe Date: Sun, 5 Oct 2025 22:57:04 +1100 Subject: [PATCH 1/2] Fix some warnings. --- LLama.Examples/Examples/QuantizeModel.cs | 2 ++ LLama/Batched/Conversation.cs | 2 +- LLama/Common/FixedSizeQueue.cs | 1 - LLama/LLamaExecutorBase.cs | 4 ++-- LLama/LLamaInstructExecutor.cs | 6 +++++- LLama/LLamaInteractExecutor.cs | 25 ++++++++++++++++-------- LLama/Native/SafeLlamaModelHandle.cs | 2 ++ 7 files changed, 29 insertions(+), 13 deletions(-) diff --git a/LLama.Examples/Examples/QuantizeModel.cs b/LLama.Examples/Examples/QuantizeModel.cs index a1f7ca1bd..dace956ca 100644 --- a/LLama.Examples/Examples/QuantizeModel.cs +++ b/LLama.Examples/Examples/QuantizeModel.cs @@ -20,6 +20,8 @@ public static async Task Run() { Console.WriteLine("Quantization failed!"); } + + await Task.CompletedTask; } } } diff --git a/LLama/Batched/Conversation.cs b/LLama/Batched/Conversation.cs index fcc94ae8f..c504ce07a 100644 --- a/LLama/Batched/Conversation.cs +++ b/LLama/Batched/Conversation.cs @@ -410,7 +410,7 @@ public void Remove(LLamaPos start, LLamaPos end) } /// - /// Removes tokens starting from + /// Removes tokens starting from /// /// Start position (inclusive) /// Number of tokens diff --git a/LLama/Common/FixedSizeQueue.cs b/LLama/Common/FixedSizeQueue.cs index d1f2fb11d..408be623a 100644 --- a/LLama/Common/FixedSizeQueue.cs +++ b/LLama/Common/FixedSizeQueue.cs @@ -14,7 +14,6 @@ public class FixedSizeQueue private readonly T[] _buffer; private int _start; private int _count; - private T[]? _window; // Minimum capacity for the temporary buffer used to expose a contiguous view. private const int MinimumWindowSize = 4; diff --git a/LLama/LLamaExecutorBase.cs b/LLama/LLamaExecutorBase.cs index e3efb35a5..227626f78 100644 --- a/LLama/LLamaExecutorBase.cs +++ b/LLama/LLamaExecutorBase.cs @@ -262,7 +262,7 @@ protected virtual void TryReuseMatchingPrefix() /// /// /// - protected abstract Task<(bool, IReadOnlyList)> PostProcess(IInferenceParams inferenceParams, InferStateArgs args); + protected abstract (bool, IReadOnlyList) PostProcess(IInferenceParams inferenceParams, InferStateArgs args); /// /// The core inference logic. @@ -338,7 +338,7 @@ public virtual async IAsyncEnumerable InferAsync(string? text, IInferenc yield return decoded; } - var (breakGeneration, extraOutputs) = await PostProcess(inferenceParams, args); + var (breakGeneration, extraOutputs) = PostProcess(inferenceParams, args); if (extraOutputs is { Count: > 0 }) { foreach (var item in extraOutputs) diff --git a/LLama/LLamaInstructExecutor.cs b/LLama/LLamaInstructExecutor.cs index 6617687d6..9ff45c253 100644 --- a/LLama/LLamaInstructExecutor.cs +++ b/LLama/LLamaInstructExecutor.cs @@ -99,6 +99,7 @@ public override async Task SaveState(string filename) await JsonSerializer.SerializeAsync(fs, state); } } + /// public override async Task LoadState(string filename) { @@ -154,7 +155,7 @@ protected override Task PreprocessInputs(string? text, InferStateArgs args) } /// - protected override async Task<(bool, IReadOnlyList)> PostProcess(IInferenceParams inferenceParams, InferStateArgs args) + protected override (bool, IReadOnlyList) PostProcess(IInferenceParams inferenceParams, InferStateArgs args) { if (_embed_inps.Count <= _consumedTokensCount) { @@ -205,7 +206,9 @@ protected override async Task InferInternal(IInferenceParams inferenceParams, In _pastTokensCount = pastTokensCount; if (result != DecodeResult.Ok) + { throw new LLamaDecodeError(result); + } if (_embeds.Count > 0 && !string.IsNullOrEmpty(_pathSession)) { @@ -250,6 +253,7 @@ protected override async Task InferInternal(IInferenceParams inferenceParams, In return; } + /// /// The descriptor of the state of the instruct executor. /// diff --git a/LLama/LLamaInteractExecutor.cs b/LLama/LLamaInteractExecutor.cs index 1baebfa7e..fe701e8f2 100644 --- a/LLama/LLamaInteractExecutor.cs +++ b/LLama/LLamaInteractExecutor.cs @@ -67,6 +67,7 @@ public override ExecutorBaseState GetStateData() }; return state; } + /// public override Task LoadState(ExecutorBaseState data) { @@ -88,23 +89,23 @@ public override Task LoadState(ExecutorBaseState data) return Task.CompletedTask; } + /// public override async Task SaveState(string filename) { var state = (InteractiveExecutorState)GetStateData(); - using(var fs = new FileStream(filename, FileMode.Create, FileAccess.Write)) + using (var fs = new FileStream(filename, FileMode.Create, FileAccess.Write)) { await JsonSerializer.SerializeAsync(fs, state); } } + /// public override async Task LoadState(string filename) { - using (var fs = new FileStream(filename, FileMode.Open, FileAccess.Read)) - { - var state = await JsonSerializer.DeserializeAsync(fs); - await LoadState(state!); - } + using var fs = new FileStream(filename, FileMode.Open, FileAccess.Read); + var state = await JsonSerializer.DeserializeAsync(fs); + await LoadState(state!); } /// @@ -122,7 +123,11 @@ protected override Task PreprocessInputs(string? text, InferStateArgs args) if (_is_prompt_run) { // When running the first input (prompt) in interactive mode, we should specially process it. - if (text == null) throw new ArgumentException("Prompt cannot be null to trigger continuation if a prompt has not been provided previously."); + if (text == null) + { + throw new ArgumentException("Prompt cannot be null to trigger continuation if a prompt has not been provided previously."); + } + if (!IsMultiModal) { _embed_inps = Context.Tokenize(text, true, true).ToList(); @@ -203,15 +208,19 @@ private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = tru /// /// /// - protected override async Task<(bool, IReadOnlyList)> PostProcess(IInferenceParams inferenceParams, InferStateArgs args) + protected override (bool, IReadOnlyList) PostProcess(IInferenceParams inferenceParams, InferStateArgs args) { if (_embed_inps.Count <= _consumedTokensCount) { if (!string.IsNullOrEmpty(args.LastOutput) && AntipromptProcessor.Add(args.LastOutput)) + { args.WaitForInput = true; + } if (_pastTokensCount > 0 && args.WaitForInput) + { return (true, Array.Empty()); + } } if (_embeds.Count > 0 && _embeds.Last().IsEndOfGeneration(Context.Vocab)) diff --git a/LLama/Native/SafeLlamaModelHandle.cs b/LLama/Native/SafeLlamaModelHandle.cs index d335a1209..9c6c0349a 100644 --- a/LLama/Native/SafeLlamaModelHandle.cs +++ b/LLama/Native/SafeLlamaModelHandle.cs @@ -436,6 +436,7 @@ private static int llama_model_meta_val_str(SafeLlamaModelHandle model, string k /// /// /// + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] private static extern uint llama_model_n_cls_out(SafeLlamaModelHandle model); /// @@ -444,6 +445,7 @@ private static int llama_model_meta_val_str(SafeLlamaModelHandle model, string k /// /// /// + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] private static extern string? llama_model_cls_label(SafeLlamaModelHandle model, uint i); #endregion From 10486d5f66f4069df15344901df738cf7a813f35 Mon Sep 17 00:00:00 2001 From: Michael Lamothe Date: Tue, 7 Oct 2025 23:42:21 +1100 Subject: [PATCH 2/2] Make PostProcess async again. --- LLama/LLamaExecutorBase.cs | 6 +++--- LLama/LLamaInstructExecutor.cs | 8 ++++---- LLama/LLamaInteractExecutor.cs | 34 +++++++++++++++++----------------- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/LLama/LLamaExecutorBase.cs b/LLama/LLamaExecutorBase.cs index 227626f78..eee5ea49e 100644 --- a/LLama/LLamaExecutorBase.cs +++ b/LLama/LLamaExecutorBase.cs @@ -262,7 +262,7 @@ protected virtual void TryReuseMatchingPrefix() /// /// /// - protected abstract (bool, IReadOnlyList) PostProcess(IInferenceParams inferenceParams, InferStateArgs args); + protected abstract Task<(bool, IReadOnlyList)> PostProcess(IInferenceParams inferenceParams, InferStateArgs args); /// /// The core inference logic. @@ -317,7 +317,7 @@ public virtual async IAsyncEnumerable InferAsync(string? text, IInferenc NeedToSaveSession = !string.IsNullOrEmpty(_pathSession) && _n_matching_session_tokens < _embed_inps.Count }; - AntipromptProcessor.SetAntiprompts(inferenceParams.AntiPrompts ?? Array.Empty()); + AntipromptProcessor.SetAntiprompts(inferenceParams.AntiPrompts ?? []); await PreprocessInputs(text, args); @@ -338,7 +338,7 @@ public virtual async IAsyncEnumerable InferAsync(string? text, IInferenc yield return decoded; } - var (breakGeneration, extraOutputs) = PostProcess(inferenceParams, args); + var (breakGeneration, extraOutputs) = await PostProcess(inferenceParams, args); if (extraOutputs is { Count: > 0 }) { foreach (var item in extraOutputs) diff --git a/LLama/LLamaInstructExecutor.cs b/LLama/LLamaInstructExecutor.cs index 9ff45c253..a2898c098 100644 --- a/LLama/LLamaInstructExecutor.cs +++ b/LLama/LLamaInstructExecutor.cs @@ -155,19 +155,19 @@ protected override Task PreprocessInputs(string? text, InferStateArgs args) } /// - protected override (bool, IReadOnlyList) PostProcess(IInferenceParams inferenceParams, InferStateArgs args) + protected override Task<(bool, IReadOnlyList)> PostProcess(IInferenceParams inferenceParams, InferStateArgs args) { if (_embed_inps.Count <= _consumedTokensCount) { if (!string.IsNullOrEmpty(args.LastOutput) && AntipromptProcessor.Add(args.LastOutput)) { args.WaitForInput = true; - return (true, Array.Empty()); + return Task.FromResult<(bool, IReadOnlyList)>((true, [])); } if (_pastTokensCount > 0 && args.WaitForInput) { - return (true, new[] { "\n> " }); + return Task.FromResult<(bool, IReadOnlyList)>((true, [ "\n> " ])); } } @@ -181,7 +181,7 @@ protected override (bool, IReadOnlyList) PostProcess(IInferenceParams in args.RemainedTokens = inferenceParams.MaxTokens; args.WaitForInput = true; } - return (false, Array.Empty()); + return Task.FromResult<(bool, IReadOnlyList)>((false, [])); } /// diff --git a/LLama/LLamaInteractExecutor.cs b/LLama/LLamaInteractExecutor.cs index fe701e8f2..c76a11215 100644 --- a/LLama/LLamaInteractExecutor.cs +++ b/LLama/LLamaInteractExecutor.cs @@ -21,7 +21,7 @@ namespace LLama public class InteractiveExecutor : StatefulExecutorBase { private bool _is_prompt_run = true; - + // LLava private int _EmbedImagePosition = -1; private List _imageEmbedHandles = new List(); @@ -36,7 +36,7 @@ public InteractiveExecutor(LLamaContext context, ILogger? logger = null) : base(context, logger) { } - + /// /// /// @@ -46,7 +46,7 @@ public InteractiveExecutor(LLamaContext context, ILogger? logger = null) public InteractiveExecutor(LLamaContext context, LLavaWeights clipModel, ILogger? logger = null) : base(context, clipModel, logger) { - } + } /// public override ExecutorBaseState GetStateData() @@ -89,7 +89,7 @@ public override Task LoadState(ExecutorBaseState data) return Task.CompletedTask; } - + /// public override async Task SaveState(string filename) { @@ -127,7 +127,7 @@ protected override Task PreprocessInputs(string? text, InferStateArgs args) { throw new ArgumentException("Prompt cannot be null to trigger continuation if a prompt has not been provided previously."); } - + if (!IsMultiModal) { _embed_inps = Context.Tokenize(text, true, true).ToList(); @@ -164,8 +164,8 @@ protected override Task PreprocessInputs(string? text, InferStateArgs args) } /// - private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = true ) - { + private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = true) + { // If the prompt contains the tag extract this. _imageInPrompt = text.Contains(""); if (_imageInPrompt && IsMultiModal) @@ -196,7 +196,7 @@ private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = tru { var line_inp = Context.Tokenize(text, false, true); _embed_inps.AddRange(line_inp); - args.RemainedTokens -= line_inp.Length; + args.RemainedTokens -= line_inp.Length; } } return Task.CompletedTask; @@ -208,7 +208,7 @@ private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = tru /// /// /// - protected override (bool, IReadOnlyList) PostProcess(IInferenceParams inferenceParams, InferStateArgs args) + protected override Task<(bool, IReadOnlyList)> PostProcess(IInferenceParams inferenceParams, InferStateArgs args) { if (_embed_inps.Count <= _consumedTokensCount) { @@ -219,13 +219,13 @@ protected override (bool, IReadOnlyList) PostProcess(IInferenceParams in if (_pastTokensCount > 0 && args.WaitForInput) { - return (true, Array.Empty()); + return Task.FromResult((true, (IReadOnlyList)[])); } } if (_embeds.Count > 0 && _embeds.Last().IsEndOfGeneration(Context.Vocab)) { - return (true, Array.Empty()); + return Task.FromResult((true, (IReadOnlyList)[])); } if (args.RemainedTokens <= 0 && inferenceParams.MaxTokens != -1) @@ -234,7 +234,7 @@ protected override (bool, IReadOnlyList) PostProcess(IInferenceParams in args.WaitForInput = true; } - return (false, Array.Empty()); + return Task.FromResult((true, (IReadOnlyList)[])); } /// @@ -267,18 +267,18 @@ protected override async Task InferInternal(IInferenceParams inferenceParams, In // Changes to support Multi-Modal LLMs. // (DecodeResult, int, int) header, end, result; - if (IsMultiModal && _EmbedImagePosition > 0) + if (IsMultiModal && _EmbedImagePosition > 0) { // Tokens previous to the images header = await Context.DecodeAsync(_embeds.GetRange(0, _EmbedImagePosition), LLamaSeqId.Zero, batch, _pastTokensCount); _pastTokensCount = header.Item3; if (header.Item1 != DecodeResult.Ok) throw new LLamaDecodeError(header.Item1); - + // Images - foreach( var image in _imageEmbedHandles ) + foreach (var image in _imageEmbedHandles) ClipModel!.EvalImageEmbed(Context, image, ref _pastTokensCount); - + // Post-image Tokens end = await Context.DecodeAsync(_embeds.GetRange(_EmbedImagePosition, _embeds.Count - _EmbedImagePosition), LLamaSeqId.Zero, batch, _pastTokensCount); _pastTokensCount = end.Item3; @@ -294,7 +294,7 @@ protected override async Task InferInternal(IInferenceParams inferenceParams, In if (result.Item1 != DecodeResult.Ok) throw new LLamaDecodeError(result.Item1); } - + if (_embeds.Count > 0 && !string.IsNullOrEmpty(_pathSession)) {