diff --git a/LLama/LLamaExecutorBase.cs b/LLama/LLamaExecutorBase.cs
index 578bd4d84..f047ab892 100644
--- a/LLama/LLamaExecutorBase.cs
+++ b/LLama/LLamaExecutorBase.cs
@@ -70,6 +70,8 @@ public abstract class StatefulExecutorBase : ILLamaExecutor
///
protected float? MirostatMu { get; set; }
+ private StreamingTokenDecoder _decoder;
+
///
///
///
@@ -83,6 +85,7 @@ protected StatefulExecutorBase(LLamaContext context, ILogger? logger = null)
_consumedTokensCount = 0;
_n_session_consumed = 0;
_last_n_tokens = new FixedSizeQueue(Context.ContextSize).FillWith(0);
+ _decoder = new StreamingTokenDecoder(context);
}
///
@@ -294,7 +297,10 @@ public virtual async IAsyncEnumerable InferAsync(string text, IInference
await InferInternal(inferenceParams, args);
if (args.ReturnValue)
- yield return Context.DeTokenize(_embeds);
+ {
+ _decoder.AddRange(_embeds);
+ yield return _decoder.Read();
+ }
var (breakGeneration, extraOutputs) = await PostProcess(inferenceParams, args);
if (extraOutputs is { Count: > 0 })