Skip to content
2 changes: 1 addition & 1 deletion LLama.KernelMemory/LLamaSharp.KernelMemory.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.KernelMemory.Abstractions" Version="0.66.240709.1" />
<PackageReference Include="Microsoft.KernelMemory.Abstractions" Version="0.68.240716.1" />
</ItemGroup>

<ItemGroup>
Expand Down
20 changes: 19 additions & 1 deletion LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
using LLama;
using LLama.Common;
using LLama.Native;
using Microsoft.KernelMemory;
using Microsoft.KernelMemory.AI;

Expand Down Expand Up @@ -112,5 +111,24 @@ public async Task<Embedding> GenerateEmbeddingAsync(string text, CancellationTok

/// <inheritdoc/>
public int CountTokens(string text) => _embedder.Context.Tokenize(text, special: true).Length;

/// <summary>
/// Get the list of tokens for the input text
/// </summary>
/// <param name="text">Input string to be tokenized</param>
/// <returns>Read-only list of tokens for the input test</returns>
/// <remarks>
/// It throws if text is null and Includes empty stop token because addBos is left true to be consistent with the CountTokens implementation.</remarks>
/// <see cref="CountTokens(string)"/>
public IReadOnlyList<string> GetTokens(string text)
{
/* see relevant unit tests for important implementation notes regarding unicode */
var context = _embedder.Context;
var numericTokens = context.Tokenize(text, special: true);
var decoder = new StreamingTokenDecoder(context);
return numericTokens
.Select(x => { decoder.Add(x); return decoder.Read(); })
.ToList();
}
}
}
18 changes: 18 additions & 0 deletions LLama.KernelMemory/LlamaSharpTextGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -106,5 +106,23 @@ private static InferenceParams OptionsToParams(TextGenerationOptions options, In

/// <inheritdoc/>
public int CountTokens(string text) => _context.Tokenize(text, special: true).Length;

/// <summary>
/// Get the list of tokens for the input text
/// </summary>
/// <param name="text">Input string to be tokenized</param>
/// <returns>Read-only list of tokens for the input test</returns>
/// <remarks>
/// It throws if text is null and Includes empty stop token because addBos is left true to be consistent with the CountTokens implementation.</remarks>
/// <see cref="CountTokens(string)"/>
public IReadOnlyList<string> GetTokens(string text)
{
/* see relevant unit tests for important implementation notes regarding unicode */
var numericTokens = _context.Tokenize(text, special: true);
var decoder = new StreamingTokenDecoder(_context);
return numericTokens
.Select(x => { decoder.Add(x); return decoder.Read(); })
.ToList();
}
}
}
117 changes: 117 additions & 0 deletions LLama.Unittest/KernelMemory/ITextTokenizerTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
using LLama.Common;
using LLamaSharp.KernelMemory;
using Microsoft.KernelMemory.AI;
using Xunit.Abstractions;

namespace LLama.Unittest.KernelMemory
{

public abstract class ITextTokenizerTests
{
private readonly ITestOutputHelper _testOutputHelper;

#pragma warning disable KMEXP00 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
protected ITextTokenizer? _generator;
#pragma warning restore KMEXP00 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.

protected InferenceParams _infParams;
protected LLamaSharpConfig _lsConfig;

public ITextTokenizerTests(ITestOutputHelper testOutputHelper)
{
_testOutputHelper = testOutputHelper;

_infParams = new() { AntiPrompts = ["\n\n"] };
_lsConfig = new(Constants.GenerativeModelPath) { DefaultInferenceParams = _infParams };

testOutputHelper.WriteLine($"Using model {Path.GetFileName(_lsConfig.ModelPath)}");
}


[Theory]
[InlineData("The quick brown fox jumps over the lazy dog")]
[InlineData("Well, here're some special characters!!!")]
[InlineData("...___---")]
[InlineData("15 + 6 = 21 && 68 * 75 = 5100")]
[InlineData(" \n \r\n \t ")]
public void GetTokens_ShouldReturnListOfTokensForInputString(string? text)
{
var tokens = _generator!.GetTokens(text);
var tokensCount = _generator.CountTokens(text);

var expected = " " + text; // the placement of the space corresponding to BOS will vary by model tokenizer
var actual = string.Join("", tokens);

_testOutputHelper.WriteLine($"Tokens for '{text}':");
_testOutputHelper.WriteLine(string.Join("", tokens.Select(x => $"({x})")));

Assert.Equal(expected, actual);
Assert.Equal(tokensCount, tokens.Count);
}

/* This is exactly the same test as the non-unicode cases. However, there are reasons why this
* should be made a special case and may deviate in the future:
*
* As of now there appears to be no final word as to how characters that consist of more than one
* numeric token should correspond to textual tokens, and results vary according to different
* models' tokenizers. For example, given a character 'Z' that corresponds to the numeric tokens {1,2,3}
* some (llama-2) will pad the length of the total number of tokens by returning spaces as tokens
* (i.e. ' ', ' ', 'Z') while others (GPT4Tokenizer) will pad with the character itself (i.e. 'Z','Z','Z').
*
* This is very evident when tokenizing ideograms and emojis, but can arise with various unicode characters
* as well. See pull request for more relevant discussion https://github.com/SciSharp/LLamaSharp/pull/862
*
* Currently the method will remain consistent with the output of ITextTokenizer.CountTokens, meaning
* any redundant tokens will not be omitted as long as they are counted by CountTokens.
*
* StreamingTokenDecoder, while sufficiently useful for this task, was not designed with producing
* output for one numeric token at a time in mind, so ITextTokenizer.GetTokens should not be considered
* an example of proper use.
*
* Note: if this message is removed, also remove references to it in LLamaSharpTextEmbeddingGenerator.GetTokens
* and LLamaSharpTextGenerator.GetTokens
*/
[Theory]
[InlineData("And a little bit of unicode για να κρατήσουμε τα πράγματα ενδιαφέροντα")]
[InlineData("猫坐在垫子上 😀🤨🤐😏")]
public void GetTokens_Unicode_ShouldReturnListOfTokensForInputString(string? text)
{
var tokens = _generator!.GetTokens(text);
var tokensCount = _generator.CountTokens(text);

var expected = " " + text; // the placement of the space corresponding to BOS will vary by model tokenizer
var actual = string.Join("", tokens);

_testOutputHelper.WriteLine($"Tokens for '{text}':");
_testOutputHelper.WriteLine(string.Join("", tokens.Select(x => $"({x})")));

Assert.Equal(expected, actual);
Assert.Equal(tokensCount, tokens.Count);
}

[Fact]
public void GetToken_ShouldThrowForNull()
{
string? text = null;

Assert.Throws<ArgumentNullException>(() => { _generator!.GetTokens(text!); });
}

[Fact]
public void GetToken_EmptyStringYieldsOneEmptyToken()
{
var text = "";
var expected = "";

var tokens = _generator!.GetTokens(text);
var tokensCount = _generator.CountTokens(text);
var actual = tokens.Single();

_testOutputHelper.WriteLine($"Tokens for '{text}':");
_testOutputHelper.WriteLine(string.Join("", tokens.Select(x => $"({x})")));

Assert.Equal(expected, actual);
Assert.Equal(tokensCount, tokens.Count);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
using LLama.Common;
using LLamaSharp.KernelMemory;
using Microsoft.KernelMemory.AI;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using Xunit.Abstractions;

namespace LLama.Unittest.KernelMemory
{
public class LLamaSharpTextEmbeddingGeneratorTests : ITextTokenizerTests, IDisposable
{
private readonly LLamaSharpTextEmbeddingGenerator _embeddingGenerator;

public LLamaSharpTextEmbeddingGeneratorTests(ITestOutputHelper testOutputHelper) : base(testOutputHelper)
{
_embeddingGenerator = new LLamaSharpTextEmbeddingGenerator(_lsConfig);

_generator = _embeddingGenerator;
}

public void Dispose()
{
_embeddingGenerator.Dispose();
}
}
}
34 changes: 34 additions & 0 deletions LLama.Unittest/KernelMemory/LlamaSharpTextGeneratorTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
using LLama.Common;
using LLamaSharp.KernelMemory;
using Microsoft.KernelMemory.AI;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Reflection.Emit;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using Xunit.Abstractions;
using Xunit.Sdk;
using static System.Net.Mime.MediaTypeNames;

namespace LLama.Unittest.KernelMemory
{
public class LlamaSharpTextGeneratorTests : ITextTokenizerTests, IDisposable
{
private readonly LlamaSharpTextGenerator _textGenerator;

public LlamaSharpTextGeneratorTests(ITestOutputHelper testOutputHelper) : base(testOutputHelper)
{
_textGenerator = new LlamaSharpTextGenerator(_lsConfig);

_generator = _textGenerator;
}

public void Dispose()
{
_textGenerator.Dispose();
}
}
}
30 changes: 6 additions & 24 deletions LLama.Unittest/LLama.Unittest.csproj
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<Project Sdk="Microsoft.NET.Sdk">
<Project Sdk="Microsoft.NET.Sdk">
<Import Project="..\LLama\LLamaSharp.Runtime.targets" />
<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
Expand Down Expand Up @@ -29,31 +29,16 @@

<Target Name="DownloadContentFilesInner">

<DownloadFile
SourceUrl="https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_S.gguf"
DestinationFolder="Models"
DestinationFileName="llama-2-7b-chat.Q3_K_S.gguf"
SkipUnchangedFiles="true">
<DownloadFile SourceUrl="https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_S.gguf" DestinationFolder="Models" DestinationFileName="llama-2-7b-chat.Q3_K_S.gguf" SkipUnchangedFiles="true">
</DownloadFile>

<DownloadFile
SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/llava-v1.6-mistral-7b.Q3_K_XS.gguf"
DestinationFolder="Models" DestinationFileName="llava-v1.6-mistral-7b.Q3_K_XS.gguf"
SkipUnchangedFiles="true">
<DownloadFile SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/llava-v1.6-mistral-7b.Q3_K_XS.gguf" DestinationFolder="Models" DestinationFileName="llava-v1.6-mistral-7b.Q3_K_XS.gguf" SkipUnchangedFiles="true">
</DownloadFile>

<DownloadFile
SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/mmproj-model-f16.gguf"
DestinationFolder="Models"
DestinationFileName="mmproj-model-f16.gguf"
SkipUnchangedFiles="true">
<DownloadFile SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/mmproj-model-f16.gguf" DestinationFolder="Models" DestinationFileName="mmproj-model-f16.gguf" SkipUnchangedFiles="true">
</DownloadFile>

<DownloadFile
SourceUrl="https://huggingface.co/leliuga/all-MiniLM-L12-v2-GGUF/resolve/main/all-MiniLM-L12-v2.Q8_0.gguf"
DestinationFolder="Models"
DestinationFileName="all-MiniLM-L12-v2.Q8_0.gguf"
SkipUnchangedFiles="true">
<DownloadFile SourceUrl="https://huggingface.co/leliuga/all-MiniLM-L12-v2-GGUF/resolve/main/all-MiniLM-L12-v2.Q8_0.gguf" DestinationFolder="Models" DestinationFileName="all-MiniLM-L12-v2.Q8_0.gguf" SkipUnchangedFiles="true">
</DownloadFile>

</Target>
Expand All @@ -63,14 +48,11 @@
</Target>

<ItemGroup>
<ProjectReference Include="..\LLama.KernelMemory\LLamaSharp.KernelMemory.csproj" />
<ProjectReference Include="..\LLama.SemanticKernel\LLamaSharp.SemanticKernel.csproj" />
<ProjectReference Include="..\LLama\LLamaSharp.csproj" />
</ItemGroup>

<ItemGroup>
<Folder Include="Models\" />
</ItemGroup>

<ItemGroup>
<None Update="Models\all-MiniLM-L12-v2.Q8_0.gguf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
Expand Down