Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion LLama.Web/Common/ModelOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public class ModelOptions
/// <summary>
/// Model context size (n_ctx)
/// </summary>
public uint ContextSize { get; set; } = 512;
public uint? ContextSize { get; set; }

/// <summary>
/// the GPU that is used for scratch and small tensors
Expand Down
2 changes: 1 addition & 1 deletion LLama/Abstractions/IContextParams.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ public interface IContextParams
/// <summary>
/// Model context size (n_ctx)
/// </summary>
uint ContextSize { get; set; }
uint? ContextSize { get; set; }

/// <summary>
/// batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
Expand Down
2 changes: 1 addition & 1 deletion LLama/Common/FixedSizeQueue.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public FixedSizeQueue(int size)
/// <param name="data"></param>
public FixedSizeQueue(int size, IEnumerable<T> data)
{
#if !NETSTANDARD2_0
#if NET6_0_OR_GREATER
// Try to check the size without enumerating the entire IEnumerable. This may not be able to get the count,
// in which case we'll have to check later
if (data.TryGetNonEnumeratedCount(out var dataCount) && dataCount > size)
Expand Down
101 changes: 31 additions & 70 deletions LLama/Common/ModelParams.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,92 +13,60 @@ namespace LLama.Common
public record ModelParams
: ILLamaParams
{
/// <summary>
/// Model context size (n_ctx)
/// </summary>
public uint ContextSize { get; set; } = 512;
/// <summary>
/// the GPU that is used for scratch and small tensors
/// </summary>
/// <inheritdoc />
public uint? ContextSize { get; set; }

/// <inheritdoc />
public int MainGpu { get; set; } = 0;

/// <summary>
/// Number of layers to run in VRAM / GPU memory (n_gpu_layers)
/// </summary>
/// <inheritdoc />
public int GpuLayerCount { get; set; } = 20;
/// <summary>
/// Seed for the random number generator (seed)
/// </summary>

/// <inheritdoc />
public uint Seed { get; set; } = 0xFFFFFFFF;
/// <summary>
/// Use f16 instead of f32 for memory kv (memory_f16)
/// </summary>

/// <inheritdoc />
public bool UseFp16Memory { get; set; } = true;
/// <summary>
/// Use mmap for faster loads (use_mmap)
/// </summary>

/// <inheritdoc />
public bool UseMemorymap { get; set; } = true;
/// <summary>
/// Use mlock to keep model in memory (use_mlock)
/// </summary>

/// <inheritdoc />
public bool UseMemoryLock { get; set; }
/// <summary>
/// Compute perplexity over the prompt (perplexity)
/// </summary>

/// <inheritdoc />
public bool Perplexity { get; set; }
/// <summary>
/// Model path (model)
/// </summary>

/// <inheritdoc />
public string ModelPath { get; set; }

/// <summary>
/// List of LoRAs to apply
/// </summary>
/// <inheritdoc />
public AdapterCollection LoraAdapters { get; set; } = new();

/// <summary>
/// base model path for the lora adapter (lora_base)
/// </summary>
/// <inheritdoc />
public string LoraBase { get; set; } = string.Empty;

/// <summary>
/// Number of threads (null = autodetect) (n_threads)
/// </summary>
/// <inheritdoc />
public uint? Threads { get; set; }

/// <summary>
/// Number of threads to use for batch processing (null = autodetect) (n_threads)
/// </summary>
/// <inheritdoc />
public uint? BatchThreads { get; set; }

/// <summary>
/// batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
/// </summary>
/// <inheritdoc />
public uint BatchSize { get; set; } = 512;

/// <summary>
/// Whether to use embedding mode. (embedding) Note that if this is set to true,
/// The LLamaModel won't produce text response anymore.
/// </summary>
/// <inheritdoc />
public bool EmbeddingMode { get; set; }

/// <summary>
/// how split tensors should be distributed across GPUs.
/// </summary>
/// <remarks>"[ 3, 2 ]" will assign 60% of the data to GPU 0 and 40% to GPU 1.</remarks>
/// <inheritdoc />
[JsonConverter(typeof(TensorSplitsCollectionConverter))]
public TensorSplitsCollection TensorSplits { get; set; } = new();

/// <summary>
/// RoPE base frequency
/// </summary>
public float? RopeFrequencyBase { get; set; }

/// <summary>
/// RoPE frequency scaling factor
/// </summary>
public float? RopeFrequencyScale { get; set; }
/// <inheritdoc />
public float? RopeFrequencyBase { get; set; }

/// <inheritdoc />
public float? RopeFrequencyScale { get; set; }

/// <inheritdoc />
public float? YarnExtrapolationFactor { get; set; }
Expand All @@ -118,20 +86,13 @@ public record ModelParams
/// <inheritdoc />
public RopeScalingType? YarnScalingType { get; set; }

/// <summary>
/// Use experimental mul_mat_q kernels
/// </summary>
/// <inheritdoc />
public bool MulMatQ { get; set; }


/// <summary>
/// Load vocab only (no weights)
/// </summary>
/// <inheritdoc />
public bool VocabOnly { get; set; }

/// <summary>
/// The encoding to use to convert text for the model
/// </summary>
/// <inheritdoc />
[JsonConverter(typeof(EncodingConverter))]
public Encoding Encoding { get; set; } = Encoding.UTF8;

Expand Down
2 changes: 2 additions & 0 deletions LLama/Extensions/DictionaryExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ public static TValue GetValueOrDefault<TKey, TValue>(this IReadOnlyDictionary<TK
{
return GetValueOrDefaultImpl(dictionary, key, defaultValue);
}
#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
#error Target framework not supported!
#endif

internal static TValue GetValueOrDefaultImpl<TKey, TValue>(IReadOnlyDictionary<TKey, TValue> dictionary, TKey key, TValue defaultValue)
Expand Down
2 changes: 2 additions & 0 deletions LLama/Extensions/EncodingExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ public static int GetCharCount(this Encoding encoding, ReadOnlySpan<byte> bytes)
{
return GetCharCountImpl(encoding, bytes);
}
#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
#error Target framework not supported!
#endif

internal static int GetCharsImpl(Encoding encoding, ReadOnlySpan<byte> bytes, Span<char> output)
Expand Down
2 changes: 1 addition & 1 deletion LLama/Extensions/IContextParamsExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public static class IContextParamsExtensions
public static void ToLlamaContextParams(this IContextParams @params, out LLamaContextParams result)
{
result = NativeApi.llama_context_default_params();
result.n_ctx = @params.ContextSize;
result.n_ctx = @params.ContextSize ?? 0;
result.n_batch = @params.BatchSize;
result.seed = @params.Seed;
result.f16_kv = @params.UseFp16Memory;
Expand Down
2 changes: 2 additions & 0 deletions LLama/Extensions/IEnumerableExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ public static IEnumerable<T> TakeLast<T>(this IEnumerable<T> source, int count)
{
return TakeLastImpl(source, count);
}
#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
#error Target framework not supported!
#endif

internal static IEnumerable<T> TakeLastImpl<T>(IEnumerable<T> source, int count)
Expand Down
2 changes: 2 additions & 0 deletions LLama/Extensions/KeyValuePairExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,7 @@ public static void Deconstruct<TKey, TValue>(this System.Collections.Generic.Key
first = pair.Key;
second = pair.Value;
}
#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
#error Target framework not supported!
#endif
}
2 changes: 1 addition & 1 deletion LLama/Extensions/ListExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ namespace LLama.Extensions
{
internal static class ListExtensions
{
#if NETSTANDARD2_0
#if !NET6_0_OR_GREATER
public static void EnsureCapacity<T>(this List<T> list, int capacity)
{
if (list.Capacity < capacity)
Expand Down
2 changes: 1 addition & 1 deletion LLama/Native/LLamaContextParams.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public struct LLamaContextParams
public uint seed;

/// <summary>
/// text context
/// text context, 0 = from model
/// </summary>
public uint n_ctx;

Expand Down