@@ -265,6 +265,8 @@ extern "C" {
265265 bool check_tensors; // validate model tensor data
266266 };
267267
268+ // NOTE: changing the default values of parameters marked as [EXPERIMENTAL] may cause crashes or incorrect results in certain configurations
269+ // https://github.com/ggerganov/llama.cpp/pull/7544
268270 struct llama_context_params {
269271 uint32_t seed; // RNG seed, -1 for random
270272 uint32_t n_ctx; // text context, 0 = from model
@@ -291,14 +293,14 @@ extern "C" {
291293 ggml_backend_sched_eval_callback cb_eval;
292294 void * cb_eval_user_data;
293295
294- enum ggml_type type_k; // data type for K cache
295- enum ggml_type type_v; // data type for V cache
296+ enum ggml_type type_k; // data type for K cache [EXPERIMENTAL]
297+ enum ggml_type type_v; // data type for V cache [EXPERIMENTAL]
296298
297299 // Keep the booleans together to avoid misalignment during copy-by-value.
298300 bool logits_all; // the llama_decode() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
299301 bool embeddings; // if true, extract embeddings (together with logits)
300302 bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
301- bool flash_attn; // whether to use flash attention
303+ bool flash_attn; // whether to use flash attention [EXPERIMENTAL]
302304
303305 // Abort callback
304306 // if it returns true, execution of llama_decode() will be aborted
0 commit comments