Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions LLama/StreamingTokenDecoder.cs
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
using System.Buffers;
using System.Buffers;
using System.Diagnostics;
using System;
using System.Collections.Generic;
using System.Text;
using LLama.Extensions;
using LLama.Native;

namespace LLama
Expand All @@ -23,6 +22,11 @@ public sealed class StreamingTokenDecoder
/// </summary>
public int AvailableCharacters => _characters.Count;

/// <summary>
/// If true, special characters will be converted to text. If false they will be invisible.
/// </summary>
public bool DecodeSpecialTokens { get; set; }

#region constructors
/// <summary>
/// Create a new decoder
Expand Down Expand Up @@ -76,7 +80,7 @@ public void Add(LLamaToken token)
try
{
// Convert this token into bytes
var bytesAvailable = TokenToBytes(ref bytesArr, token, _weights).Length;
var bytesAvailable = TokenToBytes(ref bytesArr, token, _weights, DecodeSpecialTokens).Length;

// Convert those bytes into characters
var bytesOffset = 0;
Expand Down Expand Up @@ -108,10 +112,10 @@ public void Add(LLamaToken token)

// Converts a single token into bytes, using the `bytes` array as temporary storage.
// If the `bytes` array is too small it will get a larger one from the ArrayPool.
static Span<byte> TokenToBytes(ref byte[] bytes, LLamaToken token, SafeLlamaModelHandle model)
static Span<byte> TokenToBytes(ref byte[] bytes, LLamaToken token, SafeLlamaModelHandle model, bool special)
{
// Try to get bytes
var l = model.TokenToSpan(token, bytes);
var l = model.TokenToSpan(token, bytes, special);

// Check if the length was larger than the buffer. If so expand the buffer and try again
if (l > bytes.Length)
Expand Down