diff --git a/LLama.Examples/Examples/QuantizeModel.cs b/LLama.Examples/Examples/QuantizeModel.cs index a1f7ca1bd..dace956ca 100644 --- a/LLama.Examples/Examples/QuantizeModel.cs +++ b/LLama.Examples/Examples/QuantizeModel.cs @@ -20,6 +20,8 @@ public static async Task Run() { Console.WriteLine("Quantization failed!"); } + + await Task.CompletedTask; } } } diff --git a/LLama/Batched/Conversation.cs b/LLama/Batched/Conversation.cs index fcc94ae8f..c504ce07a 100644 --- a/LLama/Batched/Conversation.cs +++ b/LLama/Batched/Conversation.cs @@ -410,7 +410,7 @@ public void Remove(LLamaPos start, LLamaPos end) } /// - /// Removes tokens starting from + /// Removes tokens starting from /// /// Start position (inclusive) /// Number of tokens diff --git a/LLama/Common/FixedSizeQueue.cs b/LLama/Common/FixedSizeQueue.cs index d1f2fb11d..408be623a 100644 --- a/LLama/Common/FixedSizeQueue.cs +++ b/LLama/Common/FixedSizeQueue.cs @@ -14,7 +14,6 @@ public class FixedSizeQueue private readonly T[] _buffer; private int _start; private int _count; - private T[]? _window; // Minimum capacity for the temporary buffer used to expose a contiguous view. private const int MinimumWindowSize = 4; diff --git a/LLama/LLamaExecutorBase.cs b/LLama/LLamaExecutorBase.cs index e3efb35a5..eee5ea49e 100644 --- a/LLama/LLamaExecutorBase.cs +++ b/LLama/LLamaExecutorBase.cs @@ -317,7 +317,7 @@ public virtual async IAsyncEnumerable InferAsync(string? text, IInferenc NeedToSaveSession = !string.IsNullOrEmpty(_pathSession) && _n_matching_session_tokens < _embed_inps.Count }; - AntipromptProcessor.SetAntiprompts(inferenceParams.AntiPrompts ?? Array.Empty()); + AntipromptProcessor.SetAntiprompts(inferenceParams.AntiPrompts ?? []); await PreprocessInputs(text, args); diff --git a/LLama/LLamaInstructExecutor.cs b/LLama/LLamaInstructExecutor.cs index 6617687d6..a2898c098 100644 --- a/LLama/LLamaInstructExecutor.cs +++ b/LLama/LLamaInstructExecutor.cs @@ -99,6 +99,7 @@ public override async Task SaveState(string filename) await JsonSerializer.SerializeAsync(fs, state); } } + /// public override async Task LoadState(string filename) { @@ -154,19 +155,19 @@ protected override Task PreprocessInputs(string? text, InferStateArgs args) } /// - protected override async Task<(bool, IReadOnlyList)> PostProcess(IInferenceParams inferenceParams, InferStateArgs args) + protected override Task<(bool, IReadOnlyList)> PostProcess(IInferenceParams inferenceParams, InferStateArgs args) { if (_embed_inps.Count <= _consumedTokensCount) { if (!string.IsNullOrEmpty(args.LastOutput) && AntipromptProcessor.Add(args.LastOutput)) { args.WaitForInput = true; - return (true, Array.Empty()); + return Task.FromResult<(bool, IReadOnlyList)>((true, [])); } if (_pastTokensCount > 0 && args.WaitForInput) { - return (true, new[] { "\n> " }); + return Task.FromResult<(bool, IReadOnlyList)>((true, [ "\n> " ])); } } @@ -180,7 +181,7 @@ protected override Task PreprocessInputs(string? text, InferStateArgs args) args.RemainedTokens = inferenceParams.MaxTokens; args.WaitForInput = true; } - return (false, Array.Empty()); + return Task.FromResult<(bool, IReadOnlyList)>((false, [])); } /// @@ -205,7 +206,9 @@ protected override async Task InferInternal(IInferenceParams inferenceParams, In _pastTokensCount = pastTokensCount; if (result != DecodeResult.Ok) + { throw new LLamaDecodeError(result); + } if (_embeds.Count > 0 && !string.IsNullOrEmpty(_pathSession)) { @@ -250,6 +253,7 @@ protected override async Task InferInternal(IInferenceParams inferenceParams, In return; } + /// /// The descriptor of the state of the instruct executor. /// diff --git a/LLama/LLamaInteractExecutor.cs b/LLama/LLamaInteractExecutor.cs index 1baebfa7e..c76a11215 100644 --- a/LLama/LLamaInteractExecutor.cs +++ b/LLama/LLamaInteractExecutor.cs @@ -21,7 +21,7 @@ namespace LLama public class InteractiveExecutor : StatefulExecutorBase { private bool _is_prompt_run = true; - + // LLava private int _EmbedImagePosition = -1; private List _imageEmbedHandles = new List(); @@ -36,7 +36,7 @@ public InteractiveExecutor(LLamaContext context, ILogger? logger = null) : base(context, logger) { } - + /// /// /// @@ -46,7 +46,7 @@ public InteractiveExecutor(LLamaContext context, ILogger? logger = null) public InteractiveExecutor(LLamaContext context, LLavaWeights clipModel, ILogger? logger = null) : base(context, clipModel, logger) { - } + } /// public override ExecutorBaseState GetStateData() @@ -67,6 +67,7 @@ public override ExecutorBaseState GetStateData() }; return state; } + /// public override Task LoadState(ExecutorBaseState data) { @@ -88,23 +89,23 @@ public override Task LoadState(ExecutorBaseState data) return Task.CompletedTask; } + /// public override async Task SaveState(string filename) { var state = (InteractiveExecutorState)GetStateData(); - using(var fs = new FileStream(filename, FileMode.Create, FileAccess.Write)) + using (var fs = new FileStream(filename, FileMode.Create, FileAccess.Write)) { await JsonSerializer.SerializeAsync(fs, state); } } + /// public override async Task LoadState(string filename) { - using (var fs = new FileStream(filename, FileMode.Open, FileAccess.Read)) - { - var state = await JsonSerializer.DeserializeAsync(fs); - await LoadState(state!); - } + using var fs = new FileStream(filename, FileMode.Open, FileAccess.Read); + var state = await JsonSerializer.DeserializeAsync(fs); + await LoadState(state!); } /// @@ -122,7 +123,11 @@ protected override Task PreprocessInputs(string? text, InferStateArgs args) if (_is_prompt_run) { // When running the first input (prompt) in interactive mode, we should specially process it. - if (text == null) throw new ArgumentException("Prompt cannot be null to trigger continuation if a prompt has not been provided previously."); + if (text == null) + { + throw new ArgumentException("Prompt cannot be null to trigger continuation if a prompt has not been provided previously."); + } + if (!IsMultiModal) { _embed_inps = Context.Tokenize(text, true, true).ToList(); @@ -159,8 +164,8 @@ protected override Task PreprocessInputs(string? text, InferStateArgs args) } /// - private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = true ) - { + private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = true) + { // If the prompt contains the tag extract this. _imageInPrompt = text.Contains(""); if (_imageInPrompt && IsMultiModal) @@ -191,7 +196,7 @@ private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = tru { var line_inp = Context.Tokenize(text, false, true); _embed_inps.AddRange(line_inp); - args.RemainedTokens -= line_inp.Length; + args.RemainedTokens -= line_inp.Length; } } return Task.CompletedTask; @@ -203,20 +208,24 @@ private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = tru /// /// /// - protected override async Task<(bool, IReadOnlyList)> PostProcess(IInferenceParams inferenceParams, InferStateArgs args) + protected override Task<(bool, IReadOnlyList)> PostProcess(IInferenceParams inferenceParams, InferStateArgs args) { if (_embed_inps.Count <= _consumedTokensCount) { if (!string.IsNullOrEmpty(args.LastOutput) && AntipromptProcessor.Add(args.LastOutput)) + { args.WaitForInput = true; + } if (_pastTokensCount > 0 && args.WaitForInput) - return (true, Array.Empty()); + { + return Task.FromResult((true, (IReadOnlyList)[])); + } } if (_embeds.Count > 0 && _embeds.Last().IsEndOfGeneration(Context.Vocab)) { - return (true, Array.Empty()); + return Task.FromResult((true, (IReadOnlyList)[])); } if (args.RemainedTokens <= 0 && inferenceParams.MaxTokens != -1) @@ -225,7 +234,7 @@ private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = tru args.WaitForInput = true; } - return (false, Array.Empty()); + return Task.FromResult((true, (IReadOnlyList)[])); } /// @@ -258,18 +267,18 @@ protected override async Task InferInternal(IInferenceParams inferenceParams, In // Changes to support Multi-Modal LLMs. // (DecodeResult, int, int) header, end, result; - if (IsMultiModal && _EmbedImagePosition > 0) + if (IsMultiModal && _EmbedImagePosition > 0) { // Tokens previous to the images header = await Context.DecodeAsync(_embeds.GetRange(0, _EmbedImagePosition), LLamaSeqId.Zero, batch, _pastTokensCount); _pastTokensCount = header.Item3; if (header.Item1 != DecodeResult.Ok) throw new LLamaDecodeError(header.Item1); - + // Images - foreach( var image in _imageEmbedHandles ) + foreach (var image in _imageEmbedHandles) ClipModel!.EvalImageEmbed(Context, image, ref _pastTokensCount); - + // Post-image Tokens end = await Context.DecodeAsync(_embeds.GetRange(_EmbedImagePosition, _embeds.Count - _EmbedImagePosition), LLamaSeqId.Zero, batch, _pastTokensCount); _pastTokensCount = end.Item3; @@ -285,7 +294,7 @@ protected override async Task InferInternal(IInferenceParams inferenceParams, In if (result.Item1 != DecodeResult.Ok) throw new LLamaDecodeError(result.Item1); } - + if (_embeds.Count > 0 && !string.IsNullOrEmpty(_pathSession)) { diff --git a/LLama/Native/SafeLlamaModelHandle.cs b/LLama/Native/SafeLlamaModelHandle.cs index d335a1209..9c6c0349a 100644 --- a/LLama/Native/SafeLlamaModelHandle.cs +++ b/LLama/Native/SafeLlamaModelHandle.cs @@ -436,6 +436,7 @@ private static int llama_model_meta_val_str(SafeLlamaModelHandle model, string k /// /// /// + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] private static extern uint llama_model_n_cls_out(SafeLlamaModelHandle model); /// @@ -444,6 +445,7 @@ private static int llama_model_meta_val_str(SafeLlamaModelHandle model, string k /// /// /// + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] private static extern string? llama_model_cls_label(SafeLlamaModelHandle model, uint i); #endregion