From 045d6d7110ff6ae753603d379b586e6c128a059a Mon Sep 17 00:00:00 2001 From: Martin Evans Date: Fri, 10 May 2024 19:19:24 +0100 Subject: [PATCH] Rewritten some examples docs, explaining what these examples show instead of just showing the source code. --- docs/Examples/BatchedExecutorFork.md | 196 ++++++----------------- docs/Examples/BatchedExecutorGuidance.md | 131 +-------------- docs/Examples/BatchedExecutorRewind.md | 122 +------------- 3 files changed, 55 insertions(+), 394 deletions(-) diff --git a/docs/Examples/BatchedExecutorFork.md b/docs/Examples/BatchedExecutorFork.md index ad391dd1c..8ec4887b3 100644 --- a/docs/Examples/BatchedExecutorFork.md +++ b/docs/Examples/BatchedExecutorFork.md @@ -1,148 +1,48 @@ -# Bacthed executor - multi-output to one input - -```cs -using LLama.Batched; -using LLama.Common; -using LLama.Native; -using LLama.Sampling; -using Spectre.Console; - -namespace LLama.Examples.Examples; - -/// -/// This demonstrates generating multiple replies to the same prompt, with a shared cache -/// -public class BatchedExecutorFork -{ - private const int n_split = 16; - private const int n_len = 72; - - public static async Task Run() - { - string modelPath = UserSettings.GetModelPath(); - - var parameters = new ModelParams(modelPath); - using var model = LLamaWeights.LoadFromFile(parameters); - - var prompt = AnsiConsole.Ask("Prompt (or ENTER for default):", "Not many people know that"); - - // Create an executor that can evaluate a batch of conversations together - using var executor = new BatchedExecutor(model, parameters); - - // Print some info - var name = executor.Model.Metadata.GetValueOrDefault("general.name", "unknown model name"); - Console.WriteLine($"Created executor with model: {name}"); - - // Evaluate the initial prompt to create one conversation - using var start = executor.Create(); - start.Prompt(prompt); - await executor.Infer(); - - // Create the root node of the tree - var root = new Node(start); - - await AnsiConsole - .Progress() - .StartAsync(async progress => - { - var reporter = progress.AddTask("Running Inference (1)", maxValue: n_len); - - // Run inference loop - for (var i = 0; i < n_len; i++) - { - if (i != 0) - await executor.Infer(); - - // Occasionally fork all the active conversations - if (i != 0 && i % n_split == 0) - root.Split(); - - // Sample all active conversations - root.Sample(); - - // Update progress bar - reporter.Increment(1); - reporter.Description($"Running Inference ({root.ActiveConversationCount})"); - } - - // Display results - var display = new Tree(prompt); - root.Display(display); - AnsiConsole.Write(display); - }); - } - - private class Node - { - private readonly StreamingTokenDecoder _decoder; - - private readonly DefaultSamplingPipeline _sampler; - private Conversation? _conversation; - - private Node? _left; - private Node? _right; - - public int ActiveConversationCount => _conversation != null ? 1 : _left!.ActiveConversationCount + _right!.ActiveConversationCount; - - public Node(Conversation conversation) - { - _sampler = new DefaultSamplingPipeline(); - _conversation = conversation; - _decoder = new StreamingTokenDecoder(conversation.Executor.Context); - } - - public void Sample() - { - if (_conversation == null) - { - _left?.Sample(); - _right?.Sample(); - return; - } - - if (_conversation.RequiresInference) - return; - - // Sample one token - var ctx = _conversation.Executor.Context.NativeHandle; - var token = _sampler.Sample(ctx, _conversation.Sample(), Array.Empty()); - _sampler.Accept(ctx, token); - _decoder.Add(token); - - // Prompt the conversation with this token, to continue generating from there - _conversation.Prompt(token); - } - - public void Split() - { - if (_conversation != null) - { - _left = new Node(_conversation.Fork()); - _right = new Node(_conversation.Fork()); - - _conversation.Dispose(); - _conversation = null; - } - else - { - _left?.Split(); - _right?.Split(); - } - } - - public void Display(T tree, int depth = 0) - where T : IHasTreeNodes - { - var colors = new[] { "red", "green", "blue", "yellow", "white" }; - var color = colors[depth % colors.Length]; - - var message = Markup.Escape(_decoder.Read().ReplaceLineEndings("")); - - var n = tree.AddNode($"[{color}]{message}[/]"); - - _left?.Display(n, depth + 1); - _right?.Display(n, depth + 1); - } - } -} -``` \ No newline at end of file +# BatchedExecutor Fork - Generate Multiple Completions With Shared Memory + +This example demonstrates using the `BatchedExecutor` to split one sequence into multiple sequences. See the source code [here](https://github.com/SciSharp/LLamaSharp/blob/master/LLama.Examples/Examples/BatchedExecutorFork.cs). + +Sequences share memory up to the point they were split, meaning no extra memory is consumed by creating a fork. Inference runs for all sequences simultaneously, this means that running two sequences does _not_ take twice as much time as running one. + +An example output, starting with the prompt `Not many people know that`: + +``` +Not many people know that +└── , in the 17th century, a military band led by Captain Charles + ├── Bossler of Baden, Germany, composed and played a music suite titled + │ ├── the "Civil Psalm," in order to rally German Protestants during + │ │ ├── the Thirty Years' War. This tune became popular among German soldiers, + │ │ │ ├── and its popularity continued long after the war + │ │ │ └── and, eventually, reached France. The + │ │ └── the Thirty Years' War.This music, with its clear call + │ │ ├── to arms and strong Christian themes, helped + │ │ └── to arms and unwavering belief + │ └── "Baden's First National Symphony," with lyrics by a young Wol + │ ├── fgang Amadeus Mozart. The story of the composition's creation + │ │ ├── has long been forgotten. But the B + │ │ └── was popularized by a novelty book + │ └── fgang Amadeus Mozart. It's said that this music brought + │ ├── peace to Europe, at least for a + │ └── the troops together during difficult times. It + └── Newdick played a mournful dirge to accompany the procession of + ├── the head of King Charles I. It is the scene that opens my latest book + │ ├── , "Death and Taxes." The book follows a British army captain named + │ │ ├── Marcus as he seeks revenge for his wife + │ │ └── William Darnay who becomes involved in + │ └── , A King, A Pawn and a Prince. The murder of the king + │ ├── and the civil war that followed are the + │ └── is a watershed moment in the political + └── the coffin of William Shakespeare, as it was carried to its final resting place + ├── . That is the least that can be said for a man who is often regarded + │ ├── as the greatest writer in the English language + │ └── as the greatest writer the English language has + └── at Stratford-upon-Avon. Shakespeare, of course + ├── , was a famous English poet and play + └── , was one of the greatest playwright +``` + +Forked sequences can be used for many possible things. For example + - Evaluating the system prompt once and forking for each independent conversation. + - Saving a "checkpoint" in a conversation to return to later. + - Beam Search. + - Splitting a conversation, generating completions from several different "agents", and taking the best response. \ No newline at end of file diff --git a/docs/Examples/BatchedExecutorGuidance.md b/docs/Examples/BatchedExecutorGuidance.md index 94d0ef867..99912ae40 100644 --- a/docs/Examples/BatchedExecutorGuidance.md +++ b/docs/Examples/BatchedExecutorGuidance.md @@ -1,130 +1,7 @@ -# Batched executor - basic guidance +# BatchedExecutor Guidance - Classifier Free Guidance / Negative Prompting -```cs -using LLama.Batched; -using LLama.Common; -using LLama.Native; -using LLama.Sampling; -using Spectre.Console; +This example demonstrates using `Classifier Free Guidance` (a.k.a. negative prompting) with a custom sampling pipeline. Negative prompting is a way of steering the model output away from certain topics. See the source code [here](https://github.com/SciSharp/LLamaSharp/blob/master/LLama.Examples/Examples/BatchedExecutorGuidance.cs). -namespace LLama.Examples.Examples; +Two conversations are created. The `guided` conversation starts with the prompt that should be completed as shown as the output, for example `"my favourite colour is"`. The `guidance` conversation contains the negative prompt at the start, for example `"I hate the colour red. My favourite colour is"`. Note that this is a _negative_ prompt, so therefore this guidance will make the model answer as if it _likes_ the colour red. -/// -/// This demonstrates using a batch to generate two sequences and then using one -/// sequence as the negative guidance ("classifier free guidance") for the other. -/// -public class BatchedExecutorGuidance -{ - private const int n_len = 32; - - public static async Task Run() - { - string modelPath = UserSettings.GetModelPath(); - - var parameters = new ModelParams(modelPath); - using var model = LLamaWeights.LoadFromFile(parameters); - - var positivePrompt = AnsiConsole.Ask("Positive Prompt (or ENTER for default):", "My favourite colour is").Trim(); - var negativePrompt = AnsiConsole.Ask("Negative Prompt (or ENTER for default):", "I hate the colour red. My favourite colour is").Trim(); - var weight = AnsiConsole.Ask("Guidance Weight (or ENTER for default):", 2.0f); - - // Create an executor that can evaluate a batch of conversations together - using var executor = new BatchedExecutor(model, parameters); - - // Print some info - var name = executor.Model.Metadata.GetValueOrDefault("general.name", "unknown model name"); - Console.WriteLine($"Created executor with model: {name}"); - - // Load the two prompts into two conversations - using var guided = executor.Create(); - guided.Prompt(positivePrompt); - using var guidance = executor.Create(); - guidance.Prompt(negativePrompt); - - // Run inference to evaluate prompts - await AnsiConsole - .Status() - .Spinner(Spinner.Known.Line) - .StartAsync("Evaluating Prompts...", _ => executor.Infer()); - - // Fork the "guided" conversation. We'll run this one without guidance for comparison - using var unguided = guided.Fork(); - - // Run inference loop - var unguidedSampler = new GuidedSampler(null, weight); - var unguidedDecoder = new StreamingTokenDecoder(executor.Context); - var guidedSampler = new GuidedSampler(guidance, weight); - var guidedDecoder = new StreamingTokenDecoder(executor.Context); - await AnsiConsole - .Progress() - .StartAsync(async progress => - { - var reporter = progress.AddTask("Running Inference", maxValue: n_len); - - for (var i = 0; i < n_len; i++) - { - if (i != 0) - await executor.Infer(); - - // Sample from the "unguided" conversation. This is just a conversation using the same prompt, without any - // guidance. This serves as a comparison to show the effect of guidance. - var u = unguidedSampler.Sample(executor.Context.NativeHandle, unguided.Sample(), Array.Empty()); - unguidedDecoder.Add(u); - unguided.Prompt(u); - - // Sample from the "guided" conversation. This sampler will internally use the "guidance" conversation - // to steer the conversation. See how this is done in GuidedSampler.ProcessLogits (bottom of this file). - var g = guidedSampler.Sample(executor.Context.NativeHandle, guided.Sample(), Array.Empty()); - guidedDecoder.Add(g); - - // Use this token to advance both guided _and_ guidance. Keeping them in sync (except for the initial prompt). - guided.Prompt(g); - guidance.Prompt(g); - - // Early exit if we reach the natural end of the guided sentence - if (g == model.EndOfSentenceToken) - break; - - // Update progress bar - reporter.Increment(1); - } - }); - - AnsiConsole.MarkupLine($"[green]Unguided:[/][white]{unguidedDecoder.Read().ReplaceLineEndings(" ")}[/]"); - AnsiConsole.MarkupLine($"[green]Guided:[/][white]{guidedDecoder.Read().ReplaceLineEndings(" ")}[/]"); - } - - private class GuidedSampler(Conversation? guidance, float weight) - : BaseSamplingPipeline - { - public override void Accept(SafeLLamaContextHandle ctx, LLamaToken token) - { - } - - public override ISamplingPipeline Clone() - { - throw new NotSupportedException(); - } - - protected override void ProcessLogits(SafeLLamaContextHandle ctx, Span logits, ReadOnlySpan lastTokens) - { - if (guidance == null) - return; - - // Get the logits generated by the guidance sequences - var guidanceLogits = guidance.Sample(); - - // Use those logits to guide this sequence - NativeApi.llama_sample_apply_guidance(ctx, logits, guidanceLogits, weight); - } - - protected override LLamaToken ProcessTokenDataArray(SafeLLamaContextHandle ctx, LLamaTokenDataArray candidates, ReadOnlySpan lastTokens) - { - candidates.Temperature(ctx, 0.8f); - candidates.TopK(ctx, 25); - - return candidates.SampleToken(ctx); - } - } -} -``` \ No newline at end of file +A custom sampler samples the `guidance` conversation and uses that output to influence the output of the `guided` conversation. Once a token is selected _both_ conversations are continued with this token. \ No newline at end of file diff --git a/docs/Examples/BatchedExecutorRewind.md b/docs/Examples/BatchedExecutorRewind.md index 06287b7c9..78c480c77 100644 --- a/docs/Examples/BatchedExecutorRewind.md +++ b/docs/Examples/BatchedExecutorRewind.md @@ -1,121 +1,5 @@ -# Batched executor - rewinding to an earlier state +# BatchedExecutor - Rewind -```cs -using LLama.Batched; -using LLama.Common; -using LLama.Native; -using LLama.Sampling; -using Spectre.Console; +This example demonstrates using the `BatchedExecutor` to split one sequence into multiple sequences. See the source code [here](https://github.com/SciSharp/LLamaSharp/blob/master/LLama.Examples/Examples/BatchedExecutorRewind.cs). -namespace LLama.Examples.Examples; - -/// -/// This demonstrates generating tokens and then rewinding to an earlier state -/// -public class BatchedExecutorRewind -{ - private const int n_generate = 24; - private const int n_rewind = 12; - private const int n_repeats = 6; - - public static async Task Run() - { - string modelPath = UserSettings.GetModelPath(); - - var parameters = new ModelParams(modelPath); - using var model = LLamaWeights.LoadFromFile(parameters); - - var prompt = AnsiConsole.Ask("Prompt (or ENTER for default):", "Not many people know that"); - - // Create an executor that can evaluate a batch of conversations together - using var executor = new BatchedExecutor(model, parameters); - - // Print some info - var name = executor.Model.Metadata.GetValueOrDefault("general.name", "unknown model name"); - Console.WriteLine($"Created executor with model: {name}"); - - // Evaluate the initial prompt to create one conversation - using var conversation = executor.Create(); - conversation.Prompt(prompt); - - // Create the start node wrapping the conversation - var node = new Node(executor.Context); - - // Print the prompt - Console.ForegroundColor = ConsoleColor.Green; - Console.WriteLine(prompt); - - for (var i = 0; i < n_repeats; i++) - { - for (var j = 0; j < n_generate; j++) - { - // Run inference - await executor.Infer(); - - // Sample a token - var token = node.Sample(conversation); - - // Continue conversation with this token - if (j != n_generate - 1) - conversation.Prompt(token); - } - - // Write out what we generated - node.Write(n_rewind, i + 1); - - // Rewind back a few tokens - conversation.Rewind(n_rewind + 1); - - // Prompt with a token - conversation.Prompt(node.GetToken(n_generate - n_rewind - 1)); - - // Create a new node around the rewound conversation - node = new Node(executor.Context); - } - - Console.WriteLine("Press any key to exit demo"); - Console.ReadKey(true); - } - - private class Node - { - private readonly LLamaContext _context; - - private readonly List _tokens = new List(); - private readonly DefaultSamplingPipeline Sampler; - - public Node(LLamaContext context) - { - _context = context; - Sampler = new DefaultSamplingPipeline(); - } - - public LLamaToken Sample(Conversation conversation) - { - var token = Sampler.Sample(_context.NativeHandle, conversation.Sample(), Array.Empty()); - _tokens.Add(token); - return token; - } - - public void Write(int n_rewind, int depth) - { - var decoder = new StreamingTokenDecoder(_context); - - for (var i = 0; i < _tokens.Count - n_rewind; i++) - decoder.Add(_tokens[i]); - - AnsiConsole.MarkupLine($"[green]{new string(' ', depth * 3) + decoder.Read().ReplaceLineEndings(" ")}[/]"); - - for (var i = _tokens.Count - n_rewind; i < _tokens.Count; i++) - decoder.Add(_tokens[i]); - - AnsiConsole.MarkupLine($"[maroon]{decoder.Read().ReplaceLineEndings(" ")}[/]"); - } - - public LLamaToken GetToken(int index) - { - return _tokens[index]; - } - } -} -``` \ No newline at end of file +A single conversation is prompted and then continued for 24 tokens, after that it is re-wound by 12 tokens and continued from there. Rewinding simply sets the conversation back to an earlier state and requires no extra computation. \ No newline at end of file