SciSharp · martindevans · Nov 15, 2023 · Nov 15, 2023
diff --git a/LLama.Web/Common/ModelOptions.cs b/LLama.Web/Common/ModelOptions.cs
@@ -20,7 +20,7 @@ public class ModelOptions
         /// <summary>
         /// Model context size (n_ctx)
         /// </summary>
-        public uint ContextSize { get; set; } = 512;
+        public uint? ContextSize { get; set; }
 
         /// <summary>
         /// the GPU that is used for scratch and small tensors

diff --git a/LLama/Abstractions/IContextParams.cs b/LLama/Abstractions/IContextParams.cs
@@ -11,7 +11,7 @@ public interface IContextParams
     /// <summary>
     /// Model context size (n_ctx)
     /// </summary>
-    uint ContextSize { get; set; }
+    uint? ContextSize { get; set; }
 
     /// <summary>
     /// batch size for prompt processing (must be >=32 to use BLAS) (n_batch)

diff --git a/LLama/Common/FixedSizeQueue.cs b/LLama/Common/FixedSizeQueue.cs
@@ -43,7 +43,7 @@ public FixedSizeQueue(int size)
         /// <param name="data"></param>
         public FixedSizeQueue(int size, IEnumerable<T> data)
         {
-#if !NETSTANDARD2_0 
+#if NET6_0_OR_GREATER
             // Try to check the size without enumerating the entire IEnumerable. This may not be able to get the count,
             // in which case we'll have to check later
             if (data.TryGetNonEnumeratedCount(out var dataCount) && dataCount > size)

diff --git a/LLama/Common/ModelParams.cs b/LLama/Common/ModelParams.cs
@@ -13,92 +13,60 @@ namespace LLama.Common
     public record ModelParams
         : ILLamaParams
     {
-        /// <summary>
-        /// Model context size (n_ctx)
-        /// </summary>
-        public uint ContextSize { get; set; } = 512;
-        /// <summary>
-        /// the GPU that is used for scratch and small tensors
-        /// </summary>
+        /// <inheritdoc />
+        public uint? ContextSize { get; set; }
+
+        /// <inheritdoc />
         public int MainGpu { get; set; } = 0;
 
-        /// <summary>
-        /// Number of layers to run in VRAM / GPU memory (n_gpu_layers)
-        /// </summary>
+        /// <inheritdoc />
         public int GpuLayerCount { get; set; } = 20;
-        /// <summary>
-        /// Seed for the random number generator (seed)
-        /// </summary>
+
+        /// <inheritdoc />
         public uint Seed { get; set; } = 0xFFFFFFFF;
-        /// <summary>
-        /// Use f16 instead of f32 for memory kv (memory_f16)
-        /// </summary>
+
+        /// <inheritdoc />
         public bool UseFp16Memory { get; set; } = true;
-        /// <summary>
-        /// Use mmap for faster loads (use_mmap)
-        /// </summary>
+
+        /// <inheritdoc />
         public bool UseMemorymap { get; set; } = true;
-        /// <summary>
-        /// Use mlock to keep model in memory (use_mlock)
-        /// </summary>
+
+        /// <inheritdoc />
         public bool UseMemoryLock { get; set; }
-        /// <summary>
-        /// Compute perplexity over the prompt (perplexity)
-        /// </summary>
+
+        /// <inheritdoc />
         public bool Perplexity { get; set; }
-        /// <summary>
-        /// Model path (model)
-        /// </summary>
+
+        /// <inheritdoc />
         public string ModelPath { get; set; }
 
-        /// <summary>
-        /// List of LoRAs to apply
-        /// </summary>
+        /// <inheritdoc />
         public AdapterCollection LoraAdapters { get; set; } = new();
 
-        /// <summary>
-        /// base model path for the lora adapter (lora_base)
-        /// </summary>
+        /// <inheritdoc />
         public string LoraBase { get; set; } = string.Empty;
 
-        /// <summary>
-        /// Number of threads (null = autodetect) (n_threads)
-        /// </summary>
+        /// <inheritdoc />
         public uint? Threads { get; set; }
 
-        /// <summary>
-        /// Number of threads to use for batch processing (null = autodetect) (n_threads)
-        /// </summary>
+        /// <inheritdoc />
         public uint? BatchThreads { get; set; }
 
-        /// <summary>
-        /// batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
-        /// </summary>
+        /// <inheritdoc />
         public uint BatchSize { get; set; } = 512;
 
-        /// <summary>
-        /// Whether to use embedding mode. (embedding) Note that if this is set to true, 
-        /// The LLamaModel won't produce text response anymore.
-        /// </summary>
+        /// <inheritdoc />
         public bool EmbeddingMode { get; set; }
 
-        /// <summary>
-        /// how split tensors should be distributed across GPUs.
-        /// </summary>
-        /// <remarks>"[ 3, 2 ]" will assign 60% of the data to GPU 0 and 40% to GPU 1.</remarks>
+        /// <inheritdoc />
         [JsonConverter(typeof(TensorSplitsCollectionConverter))]
         public TensorSplitsCollection TensorSplits { get; set; } = new();
 
-		/// <summary>
-		/// RoPE base frequency
-		/// </summary>
-		public float? RopeFrequencyBase { get; set; }
-
-		/// <summary>
-		/// RoPE frequency scaling factor
-		/// </summary>
-		public float? RopeFrequencyScale { get; set; }
+        /// <inheritdoc />
+        public float? RopeFrequencyBase { get; set; }
 
+        /// <inheritdoc />
+        public float? RopeFrequencyScale { get; set; }
 
         /// <inheritdoc />
         public float? YarnExtrapolationFactor { get; set; }
@@ -118,20 +86,13 @@ public record ModelParams
         /// <inheritdoc />
         public RopeScalingType? YarnScalingType { get; set; }
 
-        /// <summary>
-        /// Use experimental mul_mat_q kernels
-        /// </summary>
+        /// <inheritdoc />
         public bool MulMatQ { get; set; }
 
-
-        /// <summary>
-        /// Load vocab only (no weights)
-        /// </summary>
+        /// <inheritdoc />
         public bool VocabOnly { get; set; }
 
-        /// <summary>
-        /// The encoding to use to convert text for the model
-        /// </summary>
+        /// <inheritdoc />
         [JsonConverter(typeof(EncodingConverter))]
         public Encoding Encoding { get; set; } = Encoding.UTF8;
 

diff --git a/LLama/Extensions/DictionaryExtensions.cs b/LLama/Extensions/DictionaryExtensions.cs
@@ -9,6 +9,8 @@ public static TValue GetValueOrDefault<TKey, TValue>(this IReadOnlyDictionary<TK
         {
             return GetValueOrDefaultImpl(dictionary, key, defaultValue);
         }
+#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
+    #error Target framework not supported!
 #endif
 
         internal static TValue GetValueOrDefaultImpl<TKey, TValue>(IReadOnlyDictionary<TKey, TValue> dictionary, TKey key, TValue defaultValue)

diff --git a/LLama/Extensions/EncodingExtensions.cs b/LLama/Extensions/EncodingExtensions.cs
@@ -15,6 +15,8 @@ public static int GetCharCount(this Encoding encoding, ReadOnlySpan<byte> bytes)
     {
         return GetCharCountImpl(encoding, bytes);
     }
+#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
+    #error Target framework not supported!
 #endif
 
     internal static int GetCharsImpl(Encoding encoding, ReadOnlySpan<byte> bytes, Span<char> output)

diff --git a/LLama/Extensions/IContextParamsExtensions.cs b/LLama/Extensions/IContextParamsExtensions.cs
@@ -21,7 +21,7 @@ public static class IContextParamsExtensions
         public static void ToLlamaContextParams(this IContextParams @params, out LLamaContextParams result)
         {
             result = NativeApi.llama_context_default_params();
-            result.n_ctx = @params.ContextSize;
+            result.n_ctx = @params.ContextSize ?? 0;
             result.n_batch = @params.BatchSize;
             result.seed = @params.Seed;
             result.f16_kv = @params.UseFp16Memory;

diff --git a/LLama/Extensions/IEnumerableExtensions.cs b/LLama/Extensions/IEnumerableExtensions.cs
@@ -10,6 +10,8 @@ public static IEnumerable<T> TakeLast<T>(this IEnumerable<T> source, int count)
         {
             return TakeLastImpl(source, count);
         }
+#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
+    #error Target framework not supported!
 #endif
 
         internal static IEnumerable<T> TakeLastImpl<T>(IEnumerable<T> source, int count)

diff --git a/LLama/Extensions/KeyValuePairExtensions.cs b/LLama/Extensions/KeyValuePairExtensions.cs
@@ -19,5 +19,7 @@ public static void Deconstruct<TKey, TValue>(this System.Collections.Generic.Key
         first = pair.Key;
         second = pair.Value;
     }
+#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
+    #error Target framework not supported!
 #endif
 }
diff --git a/LLama/Extensions/ListExtensions.cs b/LLama/Extensions/ListExtensions.cs
@@ -5,7 +5,7 @@ namespace LLama.Extensions
 {
     internal static class ListExtensions
     {
-#if NETSTANDARD2_0
+#if !NET6_0_OR_GREATER
         public static void EnsureCapacity<T>(this List<T> list, int capacity)
         {
             if (list.Capacity < capacity)

diff --git a/LLama/Native/LLamaContextParams.cs b/LLama/Native/LLamaContextParams.cs
@@ -22,7 +22,7 @@ public struct LLamaContextParams
         public uint seed;
 
         /// <summary>
-        /// text context
+        /// text context, 0 = from model
         /// </summary>
         public uint n_ctx;