Skip to content

Commit f93c418

Browse files
Remove the abstraction for token counting from the main evaluation API
Introduce a stand alone extension method (that is decoupled from the rest of the evaluation API) that can be used for token counting instead. This change is being made because there is still some uncertainty around what a general purpose token counting abstraction (that supports all kinds of future models, and all kinds of input modalities) should look like at the moment. We do not want to bake in an API that only supports text based inputs for the models and use cases that are prevalent today, since it would be a breaking change to change this API once we release a stable version of the evaluation APIs. We can always reintroduce the token counting support if and when there is more clarity on what a general purpose token counting abstraction should look like, or if and when a general purpose token counting abstraction is introduced in a lower layer (Microsoft.Extensions.AI) in the future. In the meanwhile, the extension method, even though less than ideal / perfect, should still allow callers to trim down the evaluated conversation history in cases where a token limit needs to be enforced.
1 parent 6abae3c commit f93c418

File tree

22 files changed

+371
-307
lines changed

22 files changed

+371
-307
lines changed

src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/ChatConversationEvaluator.cs

Lines changed: 2 additions & 170 deletions
Original file line numberDiff line numberDiff line change
@@ -57,99 +57,6 @@ public virtual async ValueTask<EvaluationResult> EvaluateAsync(
5757

5858
(ChatMessage? userRequest, List<ChatMessage> history) = GetUserRequestAndHistory(messages);
5959

60-
int inputTokenLimit = 0;
61-
int ignoredMessagesCount = 0;
62-
63-
if (chatConfiguration.TokenCounter is not null)
64-
{
65-
IEvaluationTokenCounter tokenCounter = chatConfiguration.TokenCounter;
66-
inputTokenLimit = tokenCounter.InputTokenLimit;
67-
int tokenBudget = inputTokenLimit;
68-
69-
void OnTokenBudgetExceeded()
70-
{
71-
EvaluationDiagnostic tokenBudgetExceeded =
72-
EvaluationDiagnostic.Error(
73-
$"Evaluation failed because the specified limit of {inputTokenLimit} input tokens was exceeded.");
74-
75-
result.AddDiagnosticsToAllMetrics(tokenBudgetExceeded);
76-
}
77-
78-
if (!string.IsNullOrWhiteSpace(SystemPrompt))
79-
{
80-
tokenBudget -= tokenCounter.CountTokens(SystemPrompt!);
81-
if (tokenBudget < 0)
82-
{
83-
OnTokenBudgetExceeded();
84-
return result;
85-
}
86-
}
87-
88-
string baseEvaluationPrompt =
89-
await RenderEvaluationPromptAsync(
90-
userRequest,
91-
modelResponse,
92-
includedHistory: [],
93-
additionalContext,
94-
cancellationToken).ConfigureAwait(false);
95-
96-
tokenBudget -= tokenCounter.CountTokens(baseEvaluationPrompt);
97-
if (tokenBudget < 0)
98-
{
99-
OnTokenBudgetExceeded();
100-
return result;
101-
}
102-
103-
if (history.Count > 0 && !IgnoresHistory)
104-
{
105-
if (history.Count == 1)
106-
{
107-
(bool canRender, tokenBudget) =
108-
await CanRenderAsync(
109-
history[0],
110-
tokenBudget,
111-
chatConfiguration,
112-
cancellationToken).ConfigureAwait(false);
113-
114-
if (!canRender)
115-
{
116-
ignoredMessagesCount = 1;
117-
history = [];
118-
}
119-
}
120-
else
121-
{
122-
int totalMessagesCount = history.Count;
123-
int includedMessagesCount = 0;
124-
125-
history.Reverse();
126-
127-
foreach (ChatMessage message in history)
128-
{
129-
cancellationToken.ThrowIfCancellationRequested();
130-
131-
(bool canRender, tokenBudget) =
132-
await CanRenderAsync(
133-
message,
134-
tokenBudget,
135-
chatConfiguration,
136-
cancellationToken).ConfigureAwait(false);
137-
138-
if (!canRender)
139-
{
140-
ignoredMessagesCount = totalMessagesCount - includedMessagesCount;
141-
history.RemoveRange(index: includedMessagesCount, count: ignoredMessagesCount);
142-
break;
143-
}
144-
145-
includedMessagesCount++;
146-
}
147-
148-
history.Reverse();
149-
}
150-
}
151-
}
152-
15360
var evaluationMessages = new List<ChatMessage>();
15461
if (!string.IsNullOrWhiteSpace(SystemPrompt))
15562
{
@@ -172,84 +79,9 @@ await PerformEvaluationAsync(
17279
result,
17380
cancellationToken).ConfigureAwait(false);
17481

175-
if (inputTokenLimit > 0 && ignoredMessagesCount > 0)
176-
{
177-
#pragma warning disable S103 // Lines should not be too long
178-
result.AddDiagnosticsToAllMetrics(
179-
EvaluationDiagnostic.Warning(
180-
$"The evaluation may be inconclusive because the oldest {ignoredMessagesCount} messages in the supplied conversation history were ignored in order to stay under the specified limit of {inputTokenLimit} input tokens."));
181-
#pragma warning restore S103
182-
}
183-
18482
return result;
18583
}
18684

187-
/// <summary>
188-
/// Determines if there is sufficient <paramref name="tokenBudget"/> remaining to render the
189-
/// supplied <paramref name="message"/> as part of the evaluation prompt that this <see cref="IEvaluator"/> uses.
190-
/// </summary>
191-
/// <param name="message">
192-
/// A message that is part of the conversation history for the response being evaluated and that is to be rendered
193-
/// as part of the evaluation prompt.
194-
/// </param>
195-
/// <param name="tokenBudget">
196-
/// The number of tokens available for the rendering additional content as part of the evaluation prompt.
197-
/// </param>
198-
/// <param name="chatConfiguration">
199-
/// A <see cref="ChatConfiguration"/> that specifies the <see cref="IChatClient"/> and the
200-
/// <see cref="IEvaluationTokenCounter"/> that this <see cref="IEvaluator"/> uses to perform the evaluation.
201-
/// </param>
202-
/// <param name="cancellationToken">A <see cref="CancellationToken"/> that can cancel the operation.</param>
203-
/// <returns>
204-
/// A tuple containing a <see langword="bool"/> indicating whether there is sufficient
205-
/// <paramref name="tokenBudget"/> remaining to render the supplied <paramref name="message"/> as part of the
206-
/// evaluation prompt, and an <see langword="int"/> containing the remaining token budget that would be available
207-
/// once this <paramref name="message"/> is rendered.
208-
/// </returns>
209-
protected virtual ValueTask<(bool canRender, int remainingTokenBudget)> CanRenderAsync(
210-
ChatMessage message,
211-
int tokenBudget,
212-
ChatConfiguration chatConfiguration,
213-
CancellationToken cancellationToken)
214-
{
215-
_ = Throw.IfNull(message);
216-
_ = Throw.IfNull(chatConfiguration);
217-
218-
IEvaluationTokenCounter? tokenCounter = chatConfiguration.TokenCounter;
219-
if (tokenCounter is null)
220-
{
221-
return new ValueTask<(bool, int)>((true, tokenBudget));
222-
}
223-
224-
string? author = message.AuthorName;
225-
string role = message.Role.Value;
226-
string content = message.Text ?? string.Empty;
227-
228-
int tokenCount =
229-
string.IsNullOrWhiteSpace(author)
230-
? tokenCounter.CountTokens("[") +
231-
tokenCounter.CountTokens(role) +
232-
tokenCounter.CountTokens("] ") +
233-
tokenCounter.CountTokens(content) +
234-
tokenCounter.CountTokens("\n")
235-
: tokenCounter.CountTokens("[") +
236-
tokenCounter.CountTokens(author!) +
237-
tokenCounter.CountTokens(" (") +
238-
tokenCounter.CountTokens(role) +
239-
tokenCounter.CountTokens(")] ") +
240-
tokenCounter.CountTokens(content) +
241-
tokenCounter.CountTokens("\n");
242-
243-
if (tokenCount > tokenBudget)
244-
{
245-
return new ValueTask<(bool, int)>((false, tokenBudget));
246-
}
247-
else
248-
{
249-
return new ValueTask<(bool, int)>((true, tokenBudget - tokenCount));
250-
}
251-
}
252-
25385
/// <summary>
25486
/// Renders the supplied <paramref name="response"/> to a string that can be included as part of the evaluation
25587
/// prompt that this <see cref="IEvaluator"/> uses.
@@ -351,8 +183,8 @@ protected abstract ValueTask<string> RenderEvaluationPromptAsync(
351183
/// <see cref="EvaluationMetric"/>s in the supplied <paramref name="result"/>.
352184
/// </summary>
353185
/// <param name="chatConfiguration">
354-
/// A <see cref="ChatConfiguration"/> that specifies the <see cref="IChatClient"/> and the
355-
/// <see cref="IEvaluationTokenCounter"/> that this <see cref="IEvaluator"/> uses to perform the evaluation.
186+
/// A <see cref="ChatConfiguration"/> that specifies the <see cref="IChatClient"/> that should be used if one or
187+
/// more composed <see cref="IEvaluator"/>s use an AI model to perform evaluation.
356188
/// </param>
357189
/// <param name="evaluationMessages">
358190
/// The set of messages that are to be sent to the supplied <see cref="ChatConfiguration.ChatClient"/> to perform

src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting.Azure/Storage/AzureStorageReportingConfiguration.cs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,9 @@ public static class AzureStorageReportingConfiguration
2929
/// survive in the cache before they are considered expired and evicted.
3030
/// </param>
3131
/// <param name="chatConfiguration">
32-
/// A <see cref="ChatConfiguration"/> that specifies the <see cref="IChatClient"/> and the
33-
/// <see cref="IEvaluationTokenCounter"/> that are used by AI-based <paramref name="evaluators"/> included in the
34-
/// returned <see cref="ReportingConfiguration"/>. Can be omitted if none of the included
35-
/// <paramref name="evaluators"/> are AI-based.
32+
/// A <see cref="ChatConfiguration"/> that specifies the <see cref="IChatClient"/> that is used by AI-based
33+
/// <paramref name="evaluators"/> included in the returned <see cref="ReportingConfiguration"/>. Can be omitted if
34+
/// none of the included <paramref name="evaluators"/> are AI-based.
3635
/// </param>
3736
/// <param name="enableResponseCaching">
3837
/// <see langword="true"/> to enable caching of AI responses; <see langword="false"/> otherwise.

src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/CSharp/ReportingConfiguration.cs

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,8 @@ public sealed class ReportingConfiguration
3030
public IResultStore ResultStore { get; }
3131

3232
/// <summary>
33-
/// Gets a <see cref="Evaluation.ChatConfiguration"/> that specifies the <see cref="IChatClient"/> and the
34-
/// <see cref="IEvaluationTokenCounter"/> that are used by AI-based <see cref="Evaluators"/> included in this
35-
/// <see cref="ReportingConfiguration"/>.
33+
/// Gets a <see cref="Evaluation.ChatConfiguration"/> that specifies the <see cref="IChatClient"/> that is used by
34+
/// AI-based <see cref="Evaluators"/> included in this <see cref="ReportingConfiguration"/>.
3635
/// </summary>
3736
public ChatConfiguration? ChatConfiguration { get; }
3837

@@ -103,10 +102,9 @@ public sealed class ReportingConfiguration
103102
/// The <see cref="IResultStore"/> that should be used to persist the <see cref="ScenarioRunResult"/>s.
104103
/// </param>
105104
/// <param name="chatConfiguration">
106-
/// A <see cref="Evaluation.ChatConfiguration"/> that specifies the <see cref="IChatClient"/> and the
107-
/// <see cref="IEvaluationTokenCounter"/> that are used by AI-based <paramref name="evaluators"/> included in this
108-
/// <see cref="ReportingConfiguration"/>. Can be omitted if none of the included <paramref name="evaluators"/> are
109-
/// AI-based.
105+
/// A <see cref="Evaluation.ChatConfiguration"/> that specifies the <see cref="IChatClient"/> that is used by
106+
/// AI-based <paramref name="evaluators"/> included in this <see cref="ReportingConfiguration"/>. Can be omitted if
107+
/// none of the included <paramref name="evaluators"/> are AI-based.
110108
/// </param>
111109
/// <param name="responseCacheProvider">
112110
/// The <see cref="IResponseCacheProvider"/> that should be used to cache AI responses. If omitted, AI responses
@@ -246,7 +244,7 @@ await ResponseCacheProvider.GetCacheAsync(
246244
}
247245
#pragma warning restore CA2000
248246

249-
chatConfiguration = new ChatConfiguration(chatClient, chatConfiguration.TokenCounter);
247+
chatConfiguration = new ChatConfiguration(chatClient);
250248
}
251249

252250
return new ScenarioRun(

src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/CSharp/ScenarioRun.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,9 @@ public sealed class ScenarioRun : IAsyncDisposable
8080
public string ExecutionName { get; }
8181

8282
/// <summary>
83-
/// Gets a <see cref="Evaluation.ChatConfiguration"/> that specifies the <see cref="IChatClient"/> and the
84-
/// <see cref="IEvaluationTokenCounter"/> that are used by AI-based <see cref="IEvaluator"/>s that are invoked as
85-
/// part of the evaluation of this <see cref="ScenarioRun"/>.
83+
/// Gets a <see cref="Evaluation.ChatConfiguration"/> that specifies the <see cref="IChatClient"/> that is used by
84+
/// AI-based <see cref="IEvaluator"/>s that are invoked as part of the evaluation of this
85+
/// <see cref="ScenarioRun"/>.
8686
/// </summary>
8787
public ChatConfiguration? ChatConfiguration { get; }
8888

src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/CSharp/Storage/DiskBasedReportingConfiguration.cs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,9 @@ public static class DiskBasedReportingConfiguration
2525
/// The set of <see cref="IEvaluator"/>s that should be invoked to evaluate AI responses.
2626
/// </param>
2727
/// <param name="chatConfiguration">
28-
/// A <see cref="ChatConfiguration"/> that specifies the <see cref="IChatClient"/> and the
29-
/// <see cref="IEvaluationTokenCounter"/> that are used by AI-based <paramref name="evaluators"/> included in the
30-
/// returned <see cref="ReportingConfiguration"/>. Can be omitted if none of the included
31-
/// <paramref name="evaluators"/> are AI-based.
28+
/// A <see cref="ChatConfiguration"/> that specifies the <see cref="IChatClient"/> that is used by AI-based
29+
/// <paramref name="evaluators"/> included in the returned <see cref="ReportingConfiguration"/>. Can be omitted if
30+
/// none of the included <paramref name="evaluators"/> are AI-based.
3231
/// </param>
3332
/// <param name="enableResponseCaching">
3433
/// <see langword="true"/> to enable caching of AI responses; <see langword="false"/> otherwise.

src/Libraries/Microsoft.Extensions.AI.Evaluation.Safety/ContentSafetyServiceConfigurationExtensions.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ public static ChatConfiguration ToChatConfiguration(
4646
originalChatClient: originalChatConfiguration?.ChatClient);
4747
#pragma warning restore CA2000
4848

49-
return new ChatConfiguration(newChatClient, originalChatConfiguration?.TokenCounter);
49+
return new ChatConfiguration(newChatClient);
5050
}
5151

5252
/// <summary>

src/Libraries/Microsoft.Extensions.AI.Evaluation/ChatConfiguration.cs

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,27 +9,13 @@
99
namespace Microsoft.Extensions.AI.Evaluation;
1010

1111
/// <summary>
12-
/// Specifies the <see cref="IChatClient"/> and the <see cref="IEvaluationTokenCounter"/> that should be used when
13-
/// evaluation is performed using an AI model.
12+
/// Specifies the <see cref="IChatClient"/> that should be used when evaluation is performed using an AI model.
1413
/// </summary>
1514
/// <param name="chatClient">An <see cref="IChatClient"/> that can be used to communicate with an AI model.</param>
16-
/// <param name="tokenCounter">
17-
/// An <see cref="IEvaluationTokenCounter"/> that can be used to counts tokens present in evaluation prompts, or
18-
/// <see langword="null"/> if the AI model / deployment being used does not impose an input token limit.
19-
/// </param>
20-
public sealed class ChatConfiguration(IChatClient chatClient, IEvaluationTokenCounter? tokenCounter = null)
15+
public sealed class ChatConfiguration(IChatClient chatClient)
2116
{
2217
/// <summary>
2318
/// Gets an <see cref="IChatClient"/> that can be used to communicate with an AI model.
2419
/// </summary>
2520
public IChatClient ChatClient { get; } = chatClient;
26-
27-
/// <summary>
28-
/// Gets an <see cref="IEvaluationTokenCounter"/> that can be used to counts tokens present in evaluation prompts.
29-
/// </summary>
30-
/// <remarks>
31-
/// <see cref="TokenCounter"/> can be set to <see langword="null"/> if the AI model / deployment being used does
32-
/// not impose an input token limit.
33-
/// </remarks>
34-
public IEvaluationTokenCounter? TokenCounter { get; } = tokenCounter;
3521
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System;
5+
using System.Collections.Generic;
6+
using System.Diagnostics.CodeAnalysis;
7+
using System.Linq;
8+
using Microsoft.ML.Tokenizers;
9+
using Microsoft.Shared.Diagnostics;
10+
11+
namespace Microsoft.Extensions.AI.Evaluation;
12+
13+
/// <summary>
14+
/// Extension methods for <see cref="ChatMessage"/>.
15+
/// </summary>
16+
public static class ChatMessageExtensions
17+
{
18+
/// <summary>
19+
/// Uses the supplied <paramref name="tokenizer"/> to count the number of tokens present in the supplied
20+
/// <paramref name="conversation"/>, and discards the oldest messages (from the beginning of the collection), if
21+
/// necessary, to ensure that the remaining messages fit within the specified <paramref name="tokenBudget"/>.
22+
/// </summary>
23+
/// <remarks>
24+
/// Note that this API only considers the text (i.e., <see cref="TextContent"/>) present in the supplied
25+
/// <paramref name="conversation"/>. It does not consider other modalities such as images or audio.
26+
/// </remarks>
27+
/// <param name="conversation">
28+
/// A collection of <see cref="ChatMessage"/>s representing an LLM conversation history, with the oldest messages
29+
/// appearing towards the beginning of the collection, and the newest ones appearing towards the end.
30+
/// </param>
31+
/// <param name="tokenizer">
32+
/// The <see cref="Tokenizer"/> to be used to count the number of tokens present in each message in the supplied
33+
/// <paramref name="conversation"/>.
34+
/// </param>
35+
/// <param name="tokenBudget">The overall budget for the number of tokens available.</param>
36+
/// <returns>
37+
/// A <see cref="Tuple{T1, T2}"/> that contains the collection of messages that were retained after trimming down
38+
/// the supplied <paramref name="conversation"/> to fit within the specified <paramref name="tokenBudget"/>, as
39+
/// well as an <see langword="int"/> count identifying the remaining number of tokens available after this trimming
40+
/// operation.
41+
/// </returns>
42+
[Experimental("EVAL001")]
43+
public static (IEnumerable<ChatMessage> trimmedConversation, int remainingTokenBudget) Trim(
44+
this IEnumerable<ChatMessage> conversation,
45+
Tokenizer tokenizer,
46+
int tokenBudget)
47+
{
48+
_ = Throw.IfNull(conversation);
49+
_ = Throw.IfNull(tokenizer);
50+
_ = Throw.IfLessThan(tokenBudget, min: 0);
51+
52+
var trimmedConversation = new List<ChatMessage>();
53+
int remainingTokenBudget = tokenBudget;
54+
55+
foreach (ChatMessage message in conversation.Reverse())
56+
{
57+
int tokenCount = tokenizer.CountTokens(message.Text);
58+
int newTokenBudget = remainingTokenBudget - tokenCount;
59+
60+
if (newTokenBudget < 0)
61+
{
62+
break;
63+
}
64+
else
65+
{
66+
remainingTokenBudget = newTokenBudget;
67+
trimmedConversation.Add(message);
68+
}
69+
}
70+
71+
return (trimmedConversation, remainingTokenBudget);
72+
}
73+
}

src/Libraries/Microsoft.Extensions.AI.Evaluation/CompositeEvaluator.cs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,8 @@ public CompositeEvaluator(IEnumerable<IEvaluator> evaluators)
8888
/// </param>
8989
/// <param name="modelResponse">The response that is to be evaluated.</param>
9090
/// <param name="chatConfiguration">
91-
/// A <see cref="ChatConfiguration"/> that specifies the <see cref="IChatClient"/> and the
92-
/// <see cref="IEvaluationTokenCounter"/> that should be used if one or more composed <see cref="IEvaluator"/>s use
93-
/// an AI model to perform evaluation.
91+
/// A <see cref="ChatConfiguration"/> that specifies the <see cref="IChatClient"/> that should be used if one or
92+
/// more composed <see cref="IEvaluator"/>s use an AI model to perform evaluation.
9493
/// </param>
9594
/// <param name="additionalContext">
9695
/// Additional contextual information (beyond that which is available in <paramref name="messages"/>) that composed

0 commit comments

Comments
 (0)