From 3ce0f7d003642c6bfc3ca9a8b8e94f19b3225b79 Mon Sep 17 00:00:00 2001 From: Martin Evans Date: Wed, 15 Nov 2023 01:28:15 +0000 Subject: [PATCH] Using `StreamingTextDecoder` in `LLama/LLamaExecutorBase.cs`. This should fix weird text decoding issues with multi token characters. --- LLama/LLamaExecutorBase.cs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/LLama/LLamaExecutorBase.cs b/LLama/LLamaExecutorBase.cs index 578bd4d84..f047ab892 100644 --- a/LLama/LLamaExecutorBase.cs +++ b/LLama/LLamaExecutorBase.cs @@ -70,6 +70,8 @@ public abstract class StatefulExecutorBase : ILLamaExecutor /// protected float? MirostatMu { get; set; } + private StreamingTokenDecoder _decoder; + /// /// /// @@ -83,6 +85,7 @@ protected StatefulExecutorBase(LLamaContext context, ILogger? logger = null) _consumedTokensCount = 0; _n_session_consumed = 0; _last_n_tokens = new FixedSizeQueue(Context.ContextSize).FillWith(0); + _decoder = new StreamingTokenDecoder(context); } /// @@ -294,7 +297,10 @@ public virtual async IAsyncEnumerable InferAsync(string text, IInference await InferInternal(inferenceParams, args); if (args.ReturnValue) - yield return Context.DeTokenize(_embeds); + { + _decoder.AddRange(_embeds); + yield return _decoder.Read(); + } var (breakGeneration, extraOutputs) = await PostProcess(inferenceParams, args); if (extraOutputs is { Count: > 0 })